Skip to content

Commit

Permalink
Reviewed logic, added algorithms parameter, extended tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Anton committed Feb 7, 2024
1 parent 00e0454 commit 69fd19f
Show file tree
Hide file tree
Showing 14 changed files with 329 additions and 270 deletions.
30 changes: 17 additions & 13 deletions cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,35 @@

#include <CLI/CLI.hpp>

#include "core/DirectoryScan.h"
#include "core/DirectoryGroupScan.h"
#include "core/facade/FileGroupsScan.h"

void duplicatesSearch(const std::string & directoryPath) {
namespace {
const std::vector<std::string> standardAlgorithms = {"name", "size", "head", "hash"};
}

void duplicatesSearch(const std::string & directoryPath, const std::vector<std::string> & algorithms) {
std::cout << "Search duplicates in " << directoryPath << std::endl;

core::DirectoryScan directoryScan(std::cout);
auto duplicates = directoryScan.scan(directoryPath);
core::FileGroupsScan fileGroupsScan(std::cout, algorithms);
auto duplicates = fileGroupsScan.scan(directoryPath);

core::DirectoryGroupScan directoryGroupScan;
for (const auto & group : duplicates) {
directoryGroupScan.scan(group);
}

for (auto & group : directoryGroupScan.groups()) {
std::cout << group;
for (const auto & file : group) {
std::cout << file << "\n";
}
std::cout << "\n";
}
}

int main(int argc, char **argv) {
CLI::App app{"dsearch"};

std::string directory = std::filesystem::current_path().string();
auto duplicates = app.add_option("-d,--directory", directory);
std::vector<std::string> algorithms = standardAlgorithms;
app.add_option("-d,--directory", directory);
app.add_option("-als,--algorithms", algorithms);

CLI11_PARSE(app, argc, argv);

duplicatesSearch(directory);
duplicatesSearch(directory, algorithms);
}
5 changes: 2 additions & 3 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
cmake_minimum_required(VERSION 3.22.1 FATAL_ERROR)

add_library(core
DirectoryScan.cpp
DirectoryGroupScan.cpp

src/duplicate_search/Factory.cpp
src/facade/FileGroupsScan.cpp
)

target_include_directories(core
Expand Down
71 changes: 0 additions & 71 deletions core/DirectoryGroupScan.cpp

This file was deleted.

71 changes: 0 additions & 71 deletions core/DirectoryScan.cpp

This file was deleted.

27 changes: 0 additions & 27 deletions core/include/core/DirectoryGroupScan.h

This file was deleted.

9 changes: 9 additions & 0 deletions core/include/core/duplicate_search/Factory.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

#include <memory>

#include "core/duplicate_search/DuplicateSearch.h"

namespace core {
std::unique_ptr<DuplicateSearch> createDuplicateSearch(const std::string & algorithm);
}
23 changes: 2 additions & 21 deletions core/include/core/duplicate_search/HashSearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include <fstream>
#include <string>

#include "core/HashXxh3.h"
#include "core/hash/HashXxh3.h"
#include "core/file_api/FileApi.h"
#include "core/duplicate_search/IndexedSearch.h"

Expand All @@ -12,19 +12,13 @@ namespace core

class HashSearch : public IndexedSearch<int64_t> {
public:
HashSearch(uint64_t blockSize, uint64_t ignoreSize, std::unique_ptr<FileApi> && fileApi)
HashSearch(uint64_t blockSize, std::unique_ptr<FileApi> && fileApi)
: _blockSize(blockSize)
, _ignoreSize(ignoreSize)
, _fileApi(std::move(fileApi))
{
}

void process(const std::filesystem::path & filePath) override {
if (std::filesystem::file_size(filePath) <= _ignoreSize) {
_ignored.push_back(filePath);
return;
}

HashXxh3 hash;

_fileApi->open(filePath);
Expand All @@ -37,22 +31,9 @@ class HashSearch : public IndexedSearch<int64_t> {
this->add(hash.digest(), filePath);
}

Groups duplicates() const override {
auto duplicates = IndexedSearch<int64_t>::duplicates();
duplicates.push_back(_ignored);
return duplicates;
}

void reset() override {
IndexedSearch<int64_t>::reset();
_ignored.clear();
}

protected:
uint64_t _blockSize = 0;
uint64_t _ignoreSize = 0;
std::unique_ptr<FileApi> _fileApi;
std::vector<std::filesystem::path> _ignored;
};

}
8 changes: 3 additions & 5 deletions core/include/core/duplicate_search/IndexedSearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,10 @@ class IndexedSearch : public DuplicateSearch {
public:
Groups duplicates() const override {
Groups result;
std::for_each(std::begin(_duplicatesIndex), std::end(_duplicatesIndex), [&](const auto & pair) {
// result.emplace_back(std::move(pair.second));

for (const auto & pair : _duplicatesIndex) {
result.push_back(pair.second);

});
}

return result;
}
Expand All @@ -31,7 +30,6 @@ class IndexedSearch : public DuplicateSearch {
auto uniqueIt = _uniquesIndex.find(key);

if (std::end(_uniquesIndex) != uniqueIt) {
//use move?
_duplicatesIndex[key].push_back(uniqueIt->second);
_duplicatesIndex[key].push_back(filePath);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

#include <memory>
#include <ostream>
#include <string>
#include <vector>

#include "core/DirectoryGroupScan.h"
#include "core/duplicate_search/DuplicateSearch.h"

namespace core {

class DirectoryScan {
class FileGroupsScan {
public:
DirectoryScan(std::ostream & logStream);
FileGroupsScan(std::ostream & logStream, const std::vector<std::string> & algorithms);

DuplicateSearch::Groups scan(const std::filesystem::path & deirectoryPath);

Expand Down
2 changes: 1 addition & 1 deletion core/include/core/file_api/FileStream.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace core {
class FileStream : public FileApi {
public:
void open(const std::filesystem::path & filePath) override {
_stream = std::move(std::ifstream(filePath.c_str(), std::ios::binary));
_stream = std::ifstream(filePath.c_str(), std::ios::binary);

if (!_stream.good()) {
throw std::runtime_error("can't open " + filePath.string());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include <memory>
#include <stdexcept>

// #include <xxh3.h>
#include <xxhash.h>

namespace core {
Expand Down Expand Up @@ -32,13 +31,13 @@ class HashXxh3 {
}

if (XXH3_64bits_update(_state.get(), data.data(), data.size()) == XXH_ERROR) {
throw std::runtime_error("xx3 update error");
throw std::runtime_error("xx3 vector update error");
}
}

void update(const std::string & data) {
if (XXH3_64bits_update(_state.get(), data.data(), data.size()) == XXH_ERROR) {
throw std::runtime_error("xx3 update error");
throw std::runtime_error("xx3 string update error");
}
}

Expand Down
Loading

0 comments on commit 69fd19f

Please sign in to comment.