Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Zstd compression #324

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,19 @@ else ()
set(_KEYVI_LINK_LIBRARIES_DYNAMIC "${_KEYVI_LINK_LIBRARIES_DYNAMIC} snappy")
endif ()

# Zstd
find_package(ZSTD REQUIRED)
if (ZSTD_FOUND)
list(APPEND KEYVI_INCLUDES "${ZSTD_INCLUDE_DIRS}")
else ()
message(FATAL_ERROR "Can not find zstd")
endif (ZSTD_FOUND)
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(_KEYVI_LINK_LIBRARIES_STATIC "${_KEYVI_LINK_LIBRARIES_STATIC} zstd")
else ()
set(_KEYVI_LINK_LIBRARIES_DYNAMIC "${_KEYVI_LINK_LIBRARIES_DYNAMIC} zstd")
endif ()

# rapidjson
list(APPEND KEYVI_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/keyvi/3rdparty/rapidjson/include")

Expand All @@ -143,7 +156,7 @@ string(REPLACE " " ";" _KEYVI_COMPILE_DEFINITIONS_LIST "${_KEYVI_COMPILE_DEFINIT

# keyvicompiler
add_executable(keyvicompiler keyvi/bin/keyvicompiler/keyvicompiler.cpp)
target_link_libraries(keyvicompiler ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
target_link_libraries(keyvicompiler ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})
target_compile_options(keyvicompiler PRIVATE ${_KEYVI_CXX_FLAGS_LIST})
target_compile_definitions(keyvicompiler PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
target_include_directories(keyvicompiler PRIVATE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")
Expand All @@ -152,7 +165,7 @@ install (TARGETS keyvicompiler DESTINATION bin COMPONENT applications OPTIONAL)

# keyviinspector
add_executable(keyviinspector keyvi/bin/keyviinspector/keyviinspector.cpp)
target_link_libraries(keyviinspector ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
target_link_libraries(keyviinspector ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})
target_compile_options(keyviinspector PRIVATE ${_KEYVI_CXX_FLAGS_LIST})
target_compile_definitions(keyviinspector PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
target_include_directories(keyviinspector PRIVATE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")
Expand All @@ -161,7 +174,7 @@ install (TARGETS keyviinspector DESTINATION bin COMPONENT applications OPTIONAL)

# keyvimerger
add_executable(keyvimerger keyvi/bin/keyvimerger/keyvimerger.cpp)
target_link_libraries(keyvimerger ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
target_link_libraries(keyvimerger ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})
target_compile_options(keyvimerger PRIVATE ${_KEYVI_CXX_FLAGS_LIST})
target_compile_definitions(keyvimerger PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
target_include_directories(keyvimerger PRIVATE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")
Expand All @@ -170,15 +183,15 @@ install (TARGETS keyvimerger DESTINATION bin COMPONENT applications)

# keyvi_c
add_library(keyvi_c SHARED keyvi/bin/keyvi_c/c_api.cpp)
target_link_libraries(keyvi_c ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
target_link_libraries(keyvi_c ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})
target_compile_options(keyvi_c PRIVATE ${_KEYVI_CXX_FLAGS_LIST})
target_compile_definitions(keyvi_c PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
target_include_directories(keyvi_c PRIVATE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")

# unit tests
FILE(GLOB_RECURSE UNIT_TEST_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} keyvi/tests/keyvi/*.cpp)
add_executable(unit_test_all ${UNIT_TEST_SOURCES})
target_link_libraries(unit_test_all ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
target_link_libraries(unit_test_all ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})
target_compile_options(unit_test_all PRIVATE ${_KEYVI_CXX_FLAGS_LIST})
target_compile_definitions(unit_test_all PRIVATE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
target_include_directories(unit_test_all PRIVATE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")
Expand Down Expand Up @@ -224,7 +237,7 @@ add_library(keyvi INTERFACE)

target_include_directories(keyvi INTERFACE "$<BUILD_INTERFACE:${KEYVI_INCLUDES}>")
target_compile_definitions(keyvi INTERFACE ${_KEYVI_COMPILE_DEFINITIONS_LIST})
target_link_libraries(keyvi INTERFACE ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${_OS_LIBRARIES})
target_link_libraries(keyvi INTERFACE ${Boost_LIBRARIES} ${ZLIB_LIBRARIES} ${Snappy_LIBRARY} ${ZSTD_LIBRARIES} ${_OS_LIBRARIES})

### docs

Expand Down
19 changes: 19 additions & 0 deletions cmake_modules/FindZSTD.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Find Zstd, a compression library
find_package(PkgConfig)

pkg_check_modules(ZSTD_PKGCONF libzstd)

find_path(ZSTD_INCLUDE_DIRS
NAMES zstd.h
PATHS ${ZSTD_PKGCONF_INCLUDE_DIRS}
)

find_library(ZSTD_LIBRARIES
NAMES zstd
PATHS ${ZSTD_PKGCONF_LIBRARY_DIRS}
)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(ZSTD DEFAULT_MSG ZSTD_INCLUDE_DIRS ZSTD_LIBRARIES)

mark_as_advanced(ZSTD_INCLUDE_DIRS ZSTD_LIBRARIES)
6 changes: 6 additions & 0 deletions keyvi/include/keyvi/compression/compression_selector.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "keyvi/compression/compression_strategy.h"
#include "keyvi/compression/snappy_compression_strategy.h"
#include "keyvi/compression/zlib_compression_strategy.h"
#include "keyvi/compression/zstd_compression_strategy.h"

// #define ENABLE_TRACING
#include "keyvi/dictionary/util/trace.h"
Expand All @@ -51,6 +52,8 @@ inline CompressionStrategy* compression_strategy(const std::string& name = "") {
return new ZlibCompressionStrategy(); // compression level?
} else if (lower_name == "snappy") {
return new SnappyCompressionStrategy();
} else if (lower_name == "zstd") {
return new ZstdCompressionStrategy();
} else if (lower_name == "" || lower_name == "none" || lower_name == "raw") {
return new RawCompressionStrategy();
} else {
Expand All @@ -72,6 +75,9 @@ inline decompress_func_t decompressor_by_code(const std::string& s) {
case SNAPPY_COMPRESSION:
TRACE("unpack snappy compressed string");
return SnappyCompressionStrategy::DoDecompress;
case ZSTD_COMPRESSION:
TRACE("unpack zstd compressed string");
return ZstdCompressionStrategy::DoDecompress;
default:
throw std::invalid_argument("Invalid compression code " +
boost::lexical_cast<std::string>(static_cast<int>(s[0])));
Expand Down
8 changes: 8 additions & 0 deletions keyvi/include/keyvi/compression/compression_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,16 @@
#include <string>
#include <vector>

#include "keyvi/dictionary/fsa/internal/constants.h"

namespace keyvi {
namespace compression {

enum CompressionCode {
NO_COMPRESSION = 0,
ZLIB_COMPRESSION = 1,
SNAPPY_COMPRESSION = 2,
ZSTD_COMPRESSION = 3,
};

// buffer type which is realloc-able
Expand Down Expand Up @@ -69,6 +72,9 @@ struct CompressionStrategy {

/** The "name" of the compression strategy. */
virtual std::string name() const = 0;

/** The minimum version this compressor requires */
virtual uint64_t GetFileVersionMin() const = 0;
};

/**
Expand All @@ -95,6 +101,8 @@ struct RawCompressionStrategy final : public CompressionStrategy {
static inline std::string DoDecompress(const std::string& compressed) { return compressed.substr(1); }

std::string name() const { return "raw"; }

uint64_t GetFileVersionMin() const { return KEYVI_FILE_VERSION_MIN; }
};

} /* namespace compression */
Expand Down
3 changes: 3 additions & 0 deletions keyvi/include/keyvi/compression/snappy_compression_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <string>

#include "keyvi/compression/compression_strategy.h"
#include "keyvi/dictionary/fsa/internal/constants.h"

namespace keyvi {
namespace compression {
Expand Down Expand Up @@ -61,6 +62,8 @@ struct SnappyCompressionStrategy final : public CompressionStrategy {
}

std::string name() const { return "snappy"; }

uint64_t GetFileVersionMin() const { return KEYVI_FILE_VERSION_MIN; }
};

} /* namespace compression */
Expand Down
3 changes: 3 additions & 0 deletions keyvi/include/keyvi/compression/zlib_compression_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <string>

#include "keyvi/compression/compression_strategy.h"
#include "keyvi/dictionary/fsa/internal/constants.h"

// #define ENABLE_TRACING
#include "keyvi/dictionary/util/trace.h"
Expand Down Expand Up @@ -131,6 +132,8 @@ struct ZlibCompressionStrategy final : public CompressionStrategy {

std::string name() const { return "zlib"; }

uint64_t GetFileVersionMin() const { return KEYVI_FILE_VERSION_MIN; }

private:
z_stream zstream_compress_;
};
Expand Down
87 changes: 87 additions & 0 deletions keyvi/include/keyvi/compression/zstd_compression_strategy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/* * keyvi - A key value store.
*
* Copyright 2015 Hendrik Muhs<[email protected]>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*
* zstd_compression_strategy.h
*
* Created on: September 10, 2016
* Author: Hendrik Muhs<[email protected]>
*/

#ifndef KEYVI_COMPRESSION_ZSTD_COMPRESSION_STRATEGY_H_
#define KEYVI_COMPRESSION_ZSTD_COMPRESSION_STRATEGY_H_

#include <zstd.h>

#include <string>

#include "keyvi/dictionary/fsa/internal/constants.h"

#ifndef ZSTD_DEFAULT_CLEVEL

/*-===== Pre-defined compression levels =====-*/
#define ZSTD_DEFAULT_CLEVEL 3
#define ZSTD_MAX_CLEVEL 22
#endif

#include "keyvi/compression/compression_strategy.h"

// #define ENABLE_TRACING
#include "keyvi/dictionary/util/trace.h"

namespace keyvi {
namespace compression {

/** A compression strategy that wraps zlib. */
struct ZstdCompressionStrategy final : public CompressionStrategy {
ZstdCompressionStrategy(int compression_level = ZSTD_DEFAULT_CLEVEL) : compression_level_(compression_level) {}

inline void Compress(buffer_t* buffer, const char* raw, size_t raw_size) { DoCompress(buffer, raw, raw_size); }

inline void DoCompress(buffer_t* buffer, const char* raw, size_t raw_size) {
size_t output_length = ZSTD_compressBound(raw_size);
buffer->resize(output_length + 1);
buffer->data()[0] = static_cast<char>(ZSTD_COMPRESSION);

output_length = ZSTD_compress(buffer->data() + 1, output_length, raw, raw_size, compression_level_);
buffer->resize(output_length + 1);
}

inline std::string Decompress(const std::string& compressed) { return DoDecompress(compressed); }

static std::string DoDecompress(const std::string& compressed) {
std::string uncompressed;

size_t dest_size = ZSTD_getFrameContentSize(&compressed.data()[1], compressed.size() - 1);
uncompressed.resize(dest_size);
ZSTD_decompress(&uncompressed[0], dest_size, &compressed.data()[1], compressed.size() - 1);

return uncompressed;
}

std::string name() const { return "zstd"; }

uint64_t GetFileVersionMin() const { return 3; }

private:
int compression_level_;
};

} /* namespace compression */
} /* namespace keyvi */

#endif // KEYVI_COMPRESSION_ZSTD_COMPRESSION_STRATEGY_H_
2 changes: 2 additions & 0 deletions keyvi/include/keyvi/dictionary/dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class Dictionary final {

uint64_t GetSize() const { return fsa_->GetNumberOfKeys(); }

uint64_t GetVersion() const { return fsa_->GetVersion(); }

/**
* A simple Contains method to check whether a key is in the dictionary.
*
Expand Down
3 changes: 2 additions & 1 deletion keyvi/include/keyvi/dictionary/dictionary_merger.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,8 @@ class DictionaryMerger final {
}

void CompleteMerge() {
ValueStoreMergeT* value_store = new ValueStoreMergeT(params_);
ValueStoreMergeT* value_store = new ValueStoreMergeT(inputFiles_, params_);

generator_ =
GeneratorAdapter::template CreateGenerator<keyvi::dictionary::fsa::internal::SparseArrayPersistence<uint16_t>>(
GetTotalSparseArraySize(), params_, value_store);
Expand Down
2 changes: 2 additions & 0 deletions keyvi/include/keyvi/dictionary/dictionary_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ class DictionaryProperties {

const std::string& GetSpecializedDictionaryProperties() const { return specialized_dictionary_properties_; }

uint64_t GetVersion() const { return version_; }

std::string GetStatistics() const {
rapidjson::StringBuffer string_buffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(string_buffer);
Expand Down
4 changes: 4 additions & 0 deletions keyvi/include/keyvi/dictionary/fsa/automata.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,10 @@ class Automata final {
return dictionary_properties_->GetManifest();
}

const uint64_t GetVersion() const {
return dictionary_properties_->GetVersion();
}

private:
dictionary_properties_t dictionary_properties_;
std::unique_ptr<internal::IValueStoreReader> value_store_reader_;
Expand Down
10 changes: 6 additions & 4 deletions keyvi/include/keyvi/dictionary/fsa/generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,12 @@ class Generator final {

stream << KEYVI_FILE_MAGIC;

keyvi::dictionary::DictionaryProperties p(KEYVI_FILE_VERSION_CURRENT, start_state_, number_of_keys_added_,
number_of_states_, value_store_->GetValueStoreType(),
persistence_->GetVersion(), persistence_->GetSize(), manifest_,
specialized_dictionary_properties_);
// value stores can ask for a higher version
const uint64_t file_version = std::max(KEYVI_FILE_VERSION_MIN, value_store_->GetFileVersionMin());

keyvi::dictionary::DictionaryProperties p(file_version, start_state_, number_of_keys_added_, number_of_states_,
value_store_->GetValueStoreType(), persistence_->GetVersion(),
persistence_->GetSize(), manifest_, specialized_dictionary_properties_);
p.WriteAsJsonV2(stream);

// write data from persistence
Expand Down
10 changes: 4 additions & 6 deletions keyvi/include/keyvi/dictionary/fsa/internal/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,10 @@
static const char KEYVI_FILE_MAGIC[] = "KEYVIFSA";
static const size_t KEYVI_FILE_MAGIC_LEN = 8;

// min version of the file
static const int KEYVI_FILE_VERSION_MIN = 2;
// max version of the file we support
static const int KEYVI_FILE_VERSION_MAX = 2;
// the current version of the file format
static const int KEYVI_FILE_VERSION_CURRENT = 2;
// min version of the file format
static const uint64_t KEYVI_FILE_VERSION_MIN = 2;
// max version of the file format supported
static const uint64_t KEYVI_FILE_VERSION_MAX = 3;

// min version of the persistence part
static const int KEYVI_FILE_PERSISTENCE_VERSION_MIN = 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include "keyvi/compression/compression_selector.h"
#include "keyvi/dictionary/dictionary_properties.h"
#include "keyvi/dictionary/fsa/internal/constants.h"
#include "keyvi/dictionary/fsa/internal/ivalue_store.h"
#include "keyvi/dictionary/fsa/internal/lru_generation_cache.h"
#include "keyvi/dictionary/fsa/internal/memory_map_flags.h"
Expand Down Expand Up @@ -63,6 +64,8 @@ class FloatVectorValueStoreBase {

uint32_t GetMergeWeight(uint64_t fsa_value) { return 0; }

uint64_t GetFileVersionMin() const { return KEYVI_FILE_VERSION_MIN; }

static value_store_t GetValueStoreType() { return value_store_t::FLOAT_VECTOR; }

protected:
Expand Down Expand Up @@ -218,6 +221,8 @@ class FloatVectorValueStoreMergeBase {

uint32_t GetMergeWeight(uint64_t fsa_value) { return 0; }

uint64_t GetFileVersionMin() const { return KEYVI_FILE_VERSION_MIN; }

static value_store_t GetValueStoreType() { return value_store_t::FLOAT_VECTOR; }

protected:
Expand All @@ -229,6 +234,10 @@ class FloatVectorValueStoreMergeBase {
class FloatVectorValueStoreMerge final : public FloatVectorValueStoreMergeBase {
public:
explicit FloatVectorValueStoreMerge(const keyvi::util::parameters_t& parameters = keyvi::util::parameters_t())
: FloatVectorValueStoreMerge({}, parameters) {}

explicit FloatVectorValueStoreMerge(const std::vector<std::string>& inputFiles,
const keyvi::util::parameters_t& parameters = keyvi::util::parameters_t())
: hash_(keyvi::util::mapGetMemory(parameters, MEMORY_LIMIT_KEY, DEFAULT_MEMORY_LIMIT_VALUE_STORE)) {
temporary_directory_ = keyvi::util::mapGetTemporaryPath(parameters);

Expand Down
Loading
Loading