Skip to content

Commit

Permalink
Merge pull request #6 from ssciwr/from-csv
Browse files Browse the repository at this point in the history
Add from_csv method that restores previously dumped distances
  • Loading branch information
dokempf authored Nov 25, 2020
2 parents 92d6f74 + 181c55f commit 2a993db
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
24 changes: 23 additions & 1 deletion src/hamming.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include"hamming_impl.hh"

#include<array>
#include<algorithm>
#include<fstream>
#include<iostream>
#include<sstream>
Expand All @@ -19,7 +20,28 @@ DataSet::DataSet(const std::vector<std::string>& data_)

DataSet::DataSet(const std::string& filename)
{
std::cout << "Not yet implemented" << std::endl;
// Determine correct dataset size
std::ifstream stream(filename);
std::string line;
nsamples = std::count(std::istreambuf_iterator<char>(stream),
std::istreambuf_iterator<char>(), '\n');
result.resize(nsamples * (nsamples + 1) / 2);

// Read the data
stream = std::ifstream(filename);
std::size_t i{0};
std::size_t current{0};
while(std::getline(stream, line))
{
std::istringstream s(line);
std::string d;
for(std::size_t j=0; j<current; ++j)
{
std::getline(s, d, ',');
result[i++] = std::stoi(d);
}
++current;
}
}

void DataSet::dump(const std::string& filename)
Expand Down
21 changes: 21 additions & 0 deletions src/hamming_t.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,24 @@ TEST_CASE("invalid input data: inconsistent sequence lengths", "[invalid]") {
REQUIRE_THROWS_WITH(from_stringlist(v), msg);
}
}

TEST_CASE("from_csv reproduces correct data", "[hamming]") {
std::mt19937 gen(12345);
std::vector<std::string> data(10);
for(auto& d : data)
d = make_test_string(1000, gen);

DataSet ref(data);
char tmp_file_name[L_tmpnam];
REQUIRE(std::tmpnam(tmp_file_name) != nullptr);
ref.dump(std::string(tmp_file_name));

auto restore = from_csv(std::string(tmp_file_name));
REQUIRE(ref.nsamples == restore.nsamples);
for(std::size_t i=0; i<ref.nsamples; ++i) {
for(std::size_t j=0; j<ref.nsamples; ++j) {
REQUIRE(ref[{i, j}] == restore[{i, j}]);
}
}
std::remove(tmp_file_name);
}

0 comments on commit 2a993db

Please sign in to comment.