diff --git a/ArrayStats.cpp b/ArrayStats.cpp new file mode 100644 index 0000000..eb7d266 --- /dev/null +++ b/ArrayStats.cpp @@ -0,0 +1,10 @@ +#include "ArrayStats.h" + +//This is the most useless source file because all the templated functions I can't do here. + +void ArrayStats::IncreasingDecreasing::print(){ + std::cout << "Increasing: " << increasing << std::endl; + std::cout << "Strictly increasing: " << strictlyIncreasing << std::endl; + std::cout << "Decreasing: " << decreasing << std::endl; + std::cout << "Strictly decreasing: " << strictlyDecreasing << std::endl; +} diff --git a/ArrayStats.h b/ArrayStats.h new file mode 100644 index 0000000..62822da --- /dev/null +++ b/ArrayStats.h @@ -0,0 +1,220 @@ +#ifndef ARRAYSTATS_H +#define ARRAYSTATS_H + +#include +#include // std::copy +#include //sqrtl + +namespace ArrayStats{ + struct IncreasingDecreasing{ + long int increasing = 0; + long int strictlyIncreasing = 0; + long int decreasing = 0; + long int strictlyDecreasing = 0; + IncreasingDecreasing(){} + IncreasingDecreasing(long int increasing, long int strictlyIncreasing, long int decreasing, long int strictlyDecreasing): increasing(increasing), strictlyIncreasing(strictlyIncreasing), decreasing(decreasing), strictlyDecreasing(strictlyDecreasing){} + void print(); + }; + + enum struct Flags : short{ + MEAN = 1, //unordered + STDEVS = 1 << 1, //unordered + STDEVP = 1 << 2, //unordered + MEDIAN = 1 << 3, //ordered you can do this in O(n) time using quickselect, but it's easy when sorted + MODE = 1 << 4, //ordered you can do this with a map, but it's easier when it's sorted + RANGE = 1 << 5, //unordered + MAX = 1 << 6, //unordered + MIN = 1 << 7, //unordered + INCREASING = 1 << 8, //unordered + STRICTLY_INCREASING = 1 << 9, //unordered + DECREASING = 1 << 10, //unordered + STRICTLY_DECREASING = 1 << 11, //unordered + SORTED_ALREADY = 1 << 12 //unordered + }; + const short ALL_STATS = ((short)Flags::SORTED_ALREADY) - ((short)1); + + template + void IncreasingDecreasingHandler(IncreasingDecreasing& tracker, IncreasingDecreasing& counter, const T& previous, const T& current, const short& flags = ALL_STATS){ + //This kills a counter streak, but before setting it to zero, it updates the tracker + #define KILL_COUNTER(flag, name) if((((short)flag) & ((short)flag) )){ \ + if(counter.name > tracker.name ){ \ + tracker.name = counter.name ; \ + } \ + counter.name = 1; \ + } + + if(current > previous){ //STRICTLY_INCREASING + ++(counter.increasing); + ++(counter.strictlyIncreasing); + + KILL_COUNTER(Flags::DECREASING, decreasing) + KILL_COUNTER(Flags::STRICTLY_DECREASING, strictlyDecreasing) + }else if(current == previous){ //INCREASING or DECREASING + ++(counter.increasing); + ++(counter.decreasing); + + KILL_COUNTER(Flags::STRICTLY_INCREASING, strictlyIncreasing) + KILL_COUNTER(Flags::STRICTLY_DECREASING, strictlyDecreasing) + }else{ //STRICTLY_DECREASING + ++(counter.decreasing); + ++(counter.strictlyDecreasing); + + KILL_COUNTER(Flags::STRICTLY_INCREASING, strictlyIncreasing) + KILL_COUNTER(Flags::INCREASING, increasing) + } + #undef KILL_COUNTER + } + + template + struct Stats{ + long double mean = 0.0; + long double stdevs = 0.0; + long double stdevp = 0.0; + T median = -1; + T mode = -1; + size_t modeCount = 0; + T range = -1; + T max = -1; + T min = -1; + IncreasingDecreasing increasingDecreasing; + + void print(const short& flags = ALL_STATS){ + if(flags & ((short)Flags::MEAN)) std::cout << "Mean (Average): " << mean << std::endl; + if(flags & ((short)Flags::STDEVP)) std::cout << "Standard Deviation (Population): " << stdevp << std::endl; + if(flags & ((short)Flags::STDEVS)) std::cout << "Standard Deviation (Sample): " << stdevs << std::endl; + if(flags & ((short)Flags::MEDIAN)) std::cout << "Median: " << median << std::endl; + if(flags & ((short)Flags::MODE)) std::cout << "Mode: " << mode << std::endl; + if(flags & ((short)Flags::RANGE)) std::cout << "Range: " << range << std::endl; + if(flags & ((short)Flags::MIN)) std::cout << "Min: " << min << std::endl; + if(flags & ((short)Flags::MAX)) std::cout << "Max: " << max << std::endl; + increasingDecreasing.print(); + } + }; + + template + auto getStats(const RandomAccessIterator begin, const RandomAccessIterator end, const short& flags = ALL_STATS) -> Stats::type>::type>{ //End is the pointer sizeof(T) after the last element + using T = typename std::remove_const::type>::type; + Stats stats; + if(begin == end) return stats; + //Then there has to be at least one element + stats.modeCount = 1; + stats.increasingDecreasing.increasing = 1; + stats.increasingDecreasing.decreasing = 1; + stats.increasingDecreasing.strictlyIncreasing = 1; + stats.increasingDecreasing.strictlyDecreasing = 1; + + const T* sorted = nullptr; + const bool sortedFlags = (bool)(flags & (((short)Flags::MEDIAN) | ((short)Flags::MODE))); + const bool sortedAlready = (bool)(flags & ((short)Flags::SORTED_ALREADY)); + const size_t count = std::distance(begin, end); + if(sortedFlags && (!sortedAlready)){ + T* temp = new T[count]; //Need this temp because of const things (I think) + std::copy(begin, end, temp); //So not really a single pass anymore + std::sort(temp, temp+count); + sorted = temp; + }else{ + sorted = &(*begin); //We just call it sorted but it actually isn't. Shhhh, they don't need to know + } + + if(flags & ((short)Flags::MEDIAN)){ + if ((count % 2) == 0){ + stats.median = (long double)(sorted[(count / 2) - 1] + sorted[count / 2]) / 2.0L; + }else { + stats.median = sorted[count / 2]; + } + } + + + if(flags & (((short)Flags::MAX) | ((short)Flags::RANGE))) stats.max = (*begin); + if(flags & (((short)Flags::MIN) | ((short)Flags::RANGE))) stats.min = (*begin); + if(flags & ((short)Flags::MODE)) stats.mode = (*begin); + const T* modeCurrent = &(*begin); + size_t modeCounter = 1; + IncreasingDecreasing counter(1, 1, 1, 1); + for(size_t i = 0; i < count; ++i){ + const T& value = sorted[i]; + + if(flags & (((short)Flags::MEAN) | ((short)Flags::STDEVS) | ((short)Flags::STDEVP))){ + stats.mean += (long double)value; + } + + if(flags & (((short)Flags::MODE))){ + if((*modeCurrent) == value){ + ++modeCounter; + }else{ + if(modeCounter > stats.modeCount){ + stats.mode = (*modeCurrent); + stats.modeCount = modeCounter; + } + modeCurrent = (&value); + modeCounter = 0; + } + } + + if(flags & (((short)Flags::MAX) | ((short)Flags::RANGE)) && (value > stats.max)){ + stats.max = value; + }else if(flags & (((short)Flags::MIN) | ((short)Flags::RANGE)) && (value < stats.min)){ //Can't set the max and min at the same time + stats.min = value; + } + + const bool increasingDecreasing = (bool)(flags & (((short)Flags::INCREASING) | ((short)Flags::STRICTLY_INCREASING) | ((short)Flags::DECREASING) | ((short)Flags::STRICTLY_DECREASING))); + if(increasingDecreasing && i > 0){ + //Increasing and decreasing can't use the sorted array, but we can fix that + const T& previousValue = begin[i-1]; + const T& currentValue = begin[i]; + + IncreasingDecreasingHandler(stats.increasingDecreasing, counter, previousValue, currentValue); + } + } + + if(flags & (((short)Flags::MEAN) | ((short)Flags::STDEVS) | ((short)Flags::STDEVP))){ + stats.mean /= (long double)count; + + if(flags & (((short)Flags::STDEVS) | ((short)Flags::STDEVP))){ + //Have to do another loop because we need the distance from the mean for each element + + long double totalDistanceFromMeanSquared = 0.0L; + for(size_t i = 0; i < count; ++i){ //Need to loop over it again :( + const T& x = sorted[i]; + totalDistanceFromMeanSquared += ((long double)x - stats.mean) * ((long double)x - stats.mean); //You don't need abs value because squaring it is always positive + } + + if(flags & ((short)Flags::STDEVS)){ + stats.stdevs = sqrtl(totalDistanceFromMeanSquared / (count - 1)); + } + + if(flags & ((short)Flags::STDEVP)){ + stats.stdevp = sqrtl(totalDistanceFromMeanSquared / (count)); + } + } + } + + //make stats max(stats, counter) + if(modeCounter > stats.modeCount){ + stats.mode = (*modeCurrent); + stats.modeCount = modeCounter; + } + if(counter.increasing > stats.increasingDecreasing.increasing){ + stats.increasingDecreasing.increasing = counter.increasing; + } + if(counter.strictlyIncreasing > stats.increasingDecreasing.strictlyIncreasing){ + stats.increasingDecreasing.strictlyIncreasing = counter.strictlyIncreasing; + } + if(counter.decreasing > stats.increasingDecreasing.decreasing){ + stats.increasingDecreasing.decreasing = counter.decreasing; + } + if(counter.strictlyDecreasing > stats.increasingDecreasing.strictlyDecreasing){ + stats.increasingDecreasing.strictlyDecreasing = counter.strictlyDecreasing; + } + + stats.range = stats.max - stats.min; + + if(sortedFlags && (!sortedAlready)){ + delete[] sorted; + } + + return stats; + } +} + +#endif diff --git a/README.md b/README.md index 01d80ed..46274bd 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,29 @@ # ArrayStats Get a bunch of stats about an array fast. + +##How to build the demo + +`make` + +##How to run the demo + +`./output` + +##How to use in your project + +`cp ArrayStats.h ArrayStats.cpp [DESTINATION_PATH]` + +Add `#include "ArrayStats.h"` to the file you want to use it in + +g++ compile with ArrayStats.cpp + +##Nuance + +If you don't specify anything, ArrayStats::getStats will get all the stats. If you don't need all the stats, you can specify which ones you need using the flags. Just bitwise or the flags you want, and the program will do a little less work. You can also specify if you've already sorted your array, so you can skip that bit. + +A good compiler will optimize the hell out of a function like this. A good enough compiler would even optimize out the whole program and just hard code the output because everything in the test main.cpp is constant. + + +##Closing + +This can be done faster, because you can get Median in O(n) time with quickSelect, but the idea is if you have a bunch of stats that need the array to be sorted, like mode (ik you can do it with maps), then you can share the performance benefit between them. diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..b1d8e4c --- /dev/null +++ b/main.cpp @@ -0,0 +1,26 @@ +/****************************************************************************** + +Welcome to GDB Online. + GDB online is an online compiler and debugger tool for C, C++, Python, PHP, Ruby, + C#, OCaml, VB, Perl, Swift, Prolog, Javascript, Pascal, COBOL, HTML, CSS, JS + Code, Compile, Run and Debug online from anywhere in world. + +*******************************************************************************/ +#include +#include "ArrayStats.h" +#include + +int main() +{ + const std::vector list1 = {3, 8, 5, 1, 2, 7, 3, 4}; + + ArrayStats::getStats(list1.begin(), list1.end()).print(); + + std::cout << std::endl; + + const std::vector list2 = {4, 3, 1, 7, 3, 8}; + + ArrayStats::getStats(list2.begin(), list2.end()).print(); + + return 0; +} diff --git a/makefile b/makefile new file mode 100644 index 0000000..0a15d2c --- /dev/null +++ b/makefile @@ -0,0 +1,18 @@ +output: ArrayStats.o main.o + g++ -std=c++17 -O2 ArrayStats.o main.o -o output + +ArrayStats.o: ArrayStats.cpp ArrayStats.h + g++ -std=c++17 -O2 -c ArrayStats.cpp + +main.o: main.cpp ArrayStats.h + g++ -std=c++17 -O2 -c main.cpp + +clean: + rm -f -v *.o output + +run: + ./output + +debug: + g++ -std=c++17 -O2 -g ArrayStats.cpp main.cpp -o output +