diff --git a/CMakeLists.txt b/CMakeLists.txt index 7258e19..0c20236 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -99,7 +99,6 @@ set(CPACK_ARCHIVE_COMPONENT_INSTALL ON) include(${CMAKE_SOURCE_DIR}/indexing.cmake) include(${CMAKE_SOURCE_DIR}/elasticsearch.cmake) -include(${CMAKE_SOURCE_DIR}/document_type.cmake) list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${CMAKE_INSTALL_LIBDIR}/irods") list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${IRODS_PLUGINS_DIRECTORY}") @@ -111,7 +110,4 @@ list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INS list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${IRODS_HOME_DIRECTORY}/scripts/irods") list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${IRODS_HOME_DIRECTORY}/scripts/irods/test") -set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_SOURCE_DIR}/packaging/postinst;") -set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_SOURCE_DIR}/packaging/postinst") - include(CPack) diff --git a/README.md b/README.md index 5f0e60d..a269f32 100644 --- a/README.md +++ b/README.md @@ -27,31 +27,24 @@ By default, should no resource be tagged it is assumed that all resources are av There are currently three rule engine plugins to configure for the indexing capability which should be added to the `"rule_engines"` section of `/etc/irods/server_config.json`: ``` - "rule_engines": [ - { - "instance_name": "irods_rule_engine_plugin-indexing-instance", - "plugin_name": "irods_rule_engine_plugin-indexing", - "plugin_specific_configuration": { - } - }, - { - "instance_name": "irods_rule_engine_plugin-elasticsearch-instance", - "plugin_name": "irods_rule_engine_plugin-elasticsearch", - "plugin_specific_configuration": { - "hosts" : ["http://localhost:9200/"], - "bulk_count" : 100, - "read_size" : 4194304 - } - }, - { - "instance_name": "irods_rule_engine_plugin-document_type-instance", - "plugin_name": "irods_rule_engine_plugin-document_type", - "plugin_specific_configuration": { - } - }, - ] +"rule_engines": [ + { + "instance_name": "irods_rule_engine_plugin-indexing-instance", + "plugin_name": "irods_rule_engine_plugin-indexing", + "plugin_specific_configuration": {} + }, + { + "instance_name": "irods_rule_engine_plugin-elasticsearch-instance", + "plugin_name": "irods_rule_engine_plugin-elasticsearch", + "plugin_specific_configuration": { + "hosts": ["http://localhost:9200/"], + "bulk_count": 100, + "read_size": 4194304 + } + } +] ``` -The first is the main indexing rule engine plugin, the second is the plugin responsible for implementing the policy for the indexing technology, and the third is responsible for implementing the document type introspection. Currently the default imply returns `text` as the document type. This policy can be overridden to call out to services like Tika for a better introspection of the data. +The first is the main indexing rule engine plugin and the second is the plugin responsible for implementing the policy for the indexing technology. Within each plugin configuration stanza, the "plugin_specific_configuration" object may contain a number of key-value pairs. The following pairs are currently applicable for the purpose of setting the indexing capability's operating parameters: @@ -79,12 +72,6 @@ irods_policy_indexing_metadata_index_ irods_policy_indexing_metadata_purge_ ``` -### Document Type Policy - -``` -irods_policy_indexing_document_type_ -``` - ### Plugin Testing Caveats: diff --git a/configuration.cpp b/configuration.cpp index 7b95bff..14ed126 100644 --- a/configuration.cpp +++ b/configuration.cpp @@ -1,90 +1,103 @@ - #include "configuration.hpp" + #include "plugin_specific_configuration.hpp" -#include + #include #include -namespace irods { - namespace indexing { - configuration::configuration( - const std::string& _instance_name ) : - instance_name_{_instance_name} { - try { - auto cfg = get_plugin_specific_configuration(_instance_name); - auto capture_parameter = [&](const std::string& _param, std::string& _attr) { - if (const auto iter = cfg.find(_param); iter != cfg.end()) { - _attr = iter->get(); - } - }; // capture_parameter +#include + +namespace irods::indexing +{ + configuration::configuration(const std::string& _instance_name) + : instance_name{_instance_name} + { + try { + auto cfg = get_plugin_specific_configuration(_instance_name); + auto capture_parameter = [&](const std::string& _param, std::string& _attr) { + if (const auto iter = cfg.find(_param); iter != cfg.end()) { + _attr = iter->get(); + } + }; // capture_parameter - // integer-or-string parameters + // integer-or-string parameters - using configuration_parameters::load; + using configuration_parameters::load; - job_limit = load(cfg, "job_limit_per_collection_indexing_operation", 1000); - minimum_delay_time = load(cfg, "minimum_delay_time", 1); - maximum_delay_time = load(cfg, "maximum_delay_time", 30); + job_limit = load(cfg, "job_limit_per_collection_indexing_operation", 1000); + minimum_delay_time = load(cfg, "minimum_delay_time", 1); + maximum_delay_time = load(cfg, "maximum_delay_time", 30); - // string parameters + // string parameters - capture_parameter("index", index); - capture_parameter("url_template", urlTemplate); - capture_parameter("delay_parameters", delay_parameters); - capture_parameter("collection_test_flag", collection_test_flag); - } catch ( const exception& _e ) { - THROW( KEY_NOT_FOUND, fmt::format("[{}:{}] - [{}] [error_code=[{}], instance_name=[{}]", - __func__, __LINE__, _e.client_display_what(), _e.code(), _instance_name)); - } catch ( const nlohmann::json::exception& _e ) { - irods::log( LOG_ERROR, - fmt::format("[{}:{}] in [file={}] - json exception occurred [error={}], [instance_name={}]", - __func__,__LINE__,__FILE__, _e.what(), _instance_name)); - THROW( SYS_LIBRARY_ERROR, _e.what() ); - } catch ( const std::exception& _e ) { - THROW( SYS_INTERNAL_ERR, - fmt::format("[{}:{}] in [file={}] - general exception occurred [error={}], [instance_name={}]", - __func__,__LINE__,__FILE__, _e.what(), _instance_name)); - } catch ( ... ) { - THROW( SYS_UNKNOWN_ERROR, - fmt::format( "[{}:{}] in [file={}], [instance_name={}]",__func__,__LINE__,__FILE__,_instance_name)); - } + capture_parameter("index", index); + capture_parameter("delay_parameters", delay_parameters); + capture_parameter("collection_test_flag", collection_test_flag); + } + catch (const exception& _e) { + THROW(KEY_NOT_FOUND, + fmt::format("[{}:{}] - [{}] [error_code=[{}], instance_name=[{}]", + __func__, + __LINE__, + _e.client_display_what(), + _e.code(), + _instance_name)); + } + catch (const nlohmann::json::exception& _e) { + THROW(SYS_LIBRARY_ERROR, + fmt::format("[{}:{}] in [file={}] - json exception occurred [error={}], [instance_name={}]", + __func__, + __LINE__, + __FILE__, + _e.what(), + _instance_name)); + } + catch (const std::exception& _e) { + THROW(SYS_INTERNAL_ERR, + fmt::format("[{}:{}] in [file={}] - general exception occurred [error={}], [instance_name={}]", + __func__, + __LINE__, + __FILE__, + _e.what(), + _instance_name)); + } + catch (...) { + THROW( + SYS_UNKNOWN_ERROR, + fmt::format("[{}:{}] in [file={}], [instance_name={}]", __func__, __LINE__, __FILE__, _instance_name)); + } + } // ctor configuration - } // ctor configuration + namespace policy + { + std::string compose_policy_name(const std::string& _prefix, const std::string& _technology) + { + return fmt::format("{}_{}", _prefix, _technology); + } + } // namespace policy - namespace policy { - std::string compose_policy_name( - const std::string& _prefix, - const std::string& _technology) { - return _prefix+"_"+_technology; - } - } + std::string operation_and_index_types_to_policy_name(const std::string& _operation_type, + const std::string& _index_type) + { + if (operation_type::index == _operation_type) { + if (index_type::full_text == _index_type) { + return policy::object::index; + } - std::string operation_and_index_types_to_policy_name( - const std::string& _operation_type, - const std::string& _index_type) { - if(operation_type::index == _operation_type) { - if(index_type::full_text == _index_type) { - return policy::object::index; - } - else if(index_type::metadata == _index_type) { - return policy::metadata::index; - } - } - else if(operation_type::purge == _operation_type) { - if(index_type::full_text == _index_type) { - return policy::object::purge; - } - else if(index_type::metadata == _index_type) { - return policy::metadata::purge; - } - } // else + if (index_type::metadata == _index_type) { + return policy::metadata::index; + } + } + else if (operation_type::purge == _operation_type) { + if (index_type::full_text == _index_type) { + return policy::object::purge; + } - THROW( - SYS_INVALID_INPUT_PARAM, - boost::format("operation [%s], index [%s]") - % _operation_type - % _index_type); - } // operation_and_index_types_to_policy_name - } // namespace indexing -} // namepsace irods + if (index_type::metadata == _index_type) { + return policy::metadata::purge; + } + } + THROW(SYS_INVALID_INPUT_PARAM, fmt::format("operation [{}], index [{}]", _operation_type, _index_type)); + } // operation_and_index_types_to_policy_name +} // namespace irods::indexing diff --git a/configuration.hpp b/configuration.hpp index b1dd245..171e309 100644 --- a/configuration.hpp +++ b/configuration.hpp @@ -1,234 +1,250 @@ -#ifndef CONFIGURATION_HPP -#define CONFIGURATION_HPP +#ifndef IRODS_CAPABILITY_INDEXING_CONFIGURATION_HPP +#define IRODS_CAPABILITY_INDEXING_CONFIGURATION_HPP -#include -#include #include -#include -#include -#include +#include + #include -#include #include +#include + +#include +#include + +#include +#include -namespace irods { - namespace indexing { - - namespace configuration_parameters { - - const auto VERBOSE_CONFIGURATION_LOADING = false; - - // Specializations of map_traits handle loading of configuration from: - // - // * unordered_map (iRODS <= 4.2) - // * nlohmann::json (iRODS >= 4.3) - - template struct map_traits {}; - - template<> struct map_traits { - using value_type = nlohmann::json; - using key_absent = nlohmann::json::out_of_range; - }; - - template struct map_traits> { - using value_type = V; - using key_absent = std::out_of_range; - }; - - // For internal handling of absent keys. - - struct no_such_key: std::runtime_error { - explicit no_such_key(const std::string &key) - : std::runtime_error {std::string{"No such key: "} + key} - { } - }; - - // Load a value from a configuration map by key name. - - template - auto value_slot_by_name (const M& m, - const std::string& name) -> const typename map_traits::value_type & - { - try { - return m.at(name); - } - catch(const typename map_traits::key_absent&) { - throw no_such_key { name }; - } - } - - // Helper class to get a value of the expected type T. - // T will most likely be a std::string, int, double, or bool. - - template - class getter { - - public: - - using json = nlohmann::json; - - private: - - // Helper methods for the two map types we support: - - template - bool impl_(const json& j, boost::optional& u) { - try{ u = j.get();} - catch(const json::type_error&) { - return {}; - } - return true; - } - - template - bool impl_(const boost::any& a, boost::optional& u) { - try { u=boost::any_cast(a); - } - catch(const boost::bad_any_cast&) { - return {}; - } - return true; - } - - public: - - // The interface: 'get' - // - tries to retrieve a stored value: First by the expected type T, - // or (failing that) from a string in the config. - // - returns true if the holder value contains a value successfully - // so loaded from the configuration. - - template - bool get(const M& m, boost::optional& t) { - if (impl_(m,t)) { return true; } - boost::optional text; - if (impl_(m,text)) { - t = boost::lexical_cast(*text); - return true; - } - return false; - } - }; - - // High level configuration value loader: - // =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= - // Inputs: - // * map : the iRODS 4.2/4.3 map object or subobject capable of being indexed by string - // * key : the name of the configuration variable to be loaded - // * (optional) default_v : the value to be returned if the value cannot be found or translated properly - // * (If omitted, the default constructed value for the type T is used.) - // - // Returns: the desired configuration variable of the type T - - template - const T load (const Map& map, const char* key, const T& default_v = T{}) - { - using namespace std; - boost::optional retval; - std::string error_msg = ""; - try { - auto& v = value_slot_by_name(map,key); - if (!getter{}.get(v,retval)) { - if (VERBOSE_CONFIGURATION_LOADING) { - irods::log( LOG_ERROR, fmt::format("type or format error loading key {}",key)); - } - } - } - catch (const no_such_key& e) { // key not found; fall through to rely on the default value - } - catch (std::exception& e) { - error_msg = fmt::format("Exception - {}",e.what()); - } - catch (...) { - error_msg = "Unknown error"; - } - - if (error_msg.size()) { irods::log( LOG_ERROR, error_msg); } - if (!retval) { - if (VERBOSE_CONFIGURATION_LOADING) { - irods::log(LOG_ERROR, fmt::format("Using default value of {} = {}", key, default_v)); - } - return default_v; - } - return *retval; - } - - } // namespace configuration_variable - - - - namespace policy { - // Policy Naming Examples - // irods_policy____ - // irods_policy_indexing_object_index_ - // irods_policy_indexing_collection_index_ - // irods_policy_indexing_metadata_index_ - // irods_policy_indexing_object_purge_ - // irods_policy_indexing_collection_purge_ - // irods_policy_indexing_metadata_purge_ - - static constexpr auto prefix = "irods_policy_indexing"; - std::string compose_policy_name( - const std::string& _prefix, - const std::string& _technology); - - - namespace object { - static const std::string index{"irods_policy_indexing_object_index"}; - static const std::string purge{"irods_policy_indexing_object_purge"}; - } // object - - namespace metadata { - static const std::string index{"irods_policy_indexing_metadata_index"}; - static const std::string purge{"irods_policy_indexing_metadata_purge"}; - } // metadata - - namespace collection { - static const std::string index{"irods_policy_indexing_collection_index"}; - static const std::string purge{"irods_policy_indexing_collection_purge"}; - } // collection - - } // policy - - std::string operation_and_index_types_to_policy_name( - const std::string& _operation_type, - const std::string& _index_type); - - namespace schedule { - static const std::string object{"irods_policy_schedule_object_indexing"}; - static const std::string collection{"irods_policy_schedule_collection_indexing"}; - } - - namespace index_type { - static const std::string full_text{"full_text"}; - static const std::string metadata{"metadata"}; - } - - namespace operation_type { - static const std::string index{"index"}; - static const std::string purge{"purge"}; - } - - struct configuration { - // metadata attributes - std::string index{"irods::indexing::index"}; - std::string flag{"irods::indexing::flag"}; - - // basic configuration - int minimum_delay_time{1}; - int maximum_delay_time{30}; - int job_limit {}; - std::string delay_parameters{"60s DOUBLE UNTIL SUCCESS OR 5 TIMES"}; - std::string urlTemplate{"http:/{}"}; // Clients should aim not to use this index-embedded URL. It is no longer used by - // MetaLnx as of version 2.5.0, nor by the search plugin extension. - // Thus, it is a possible target for deprecation. - int log_level{LOG_DEBUG}; - std::string collection_test_flag {""}; - - const std::string instance_name_{}; - explicit configuration(const std::string& _instance_name); - }; // struct configuration - } // namespace indexing -} // namespace irods - -#endif // STORAGE_TIERING_CONFIGURATION_HPP +namespace irods::indexing +{ + namespace configuration_parameters + { + const auto VERBOSE_CONFIGURATION_LOADING = false; + + // Specializations of map_traits handle loading of configuration from: + // + // * unordered_map (iRODS <= 4.2) + // * nlohmann::json (iRODS >= 4.3) + + template + struct map_traits + { + }; + + template <> + struct map_traits + { + using value_type = nlohmann::json; + using key_absent = nlohmann::json::out_of_range; + }; + + template + struct map_traits> + { + using value_type = V; + using key_absent = std::out_of_range; + }; + + // For internal handling of absent keys. + + struct no_such_key : std::runtime_error + { + explicit no_such_key(const std::string& key) + : std::runtime_error{std::string{"No such key: "} + key} + { + } + }; + + // Load a value from a configuration map by key name. + + template + auto value_slot_by_name(const M& m, const std::string& name) -> const typename map_traits::value_type& + { + try { + return m.at(name); + } + catch (const typename map_traits::key_absent&) { + throw no_such_key{name}; + } + } + + // Helper class to get a value of the expected type T. + // T will most likely be a std::string, int, double, or bool. + + template + class getter + { + public: + using json = nlohmann::json; + + private: + // Helper methods for the two map types we support: + + template + bool impl_(const json& j, boost::optional& u) + { + try { + u = j.get(); + } + catch (const json::type_error&) { + return {}; + } + return true; + } + + template + bool impl_(const boost::any& a, boost::optional& u) + { + try { + u = boost::any_cast(a); + } + catch (const boost::bad_any_cast&) { + return {}; + } + return true; + } + + public: + // The interface: 'get' + // - tries to retrieve a stored value: First by the expected type T, + // or (failing that) from a string in the config. + // - returns true if the holder value contains a value successfully + // so loaded from the configuration. + + template + bool get(const M& m, boost::optional& t) + { + if (impl_(m, t)) { + return true; + } + boost::optional text; + if (impl_(m, text)) { + t = boost::lexical_cast(*text); + return true; + } + return false; + } + }; + + // High level configuration value loader: + // =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + // Inputs: + // * map : the iRODS 4.2/4.3 map object or subobject capable of being indexed by string + // * key : the name of the configuration variable to be loaded + // * (optional) default_v : the value to be returned if the value cannot be found or translated properly + // * (If omitted, the default constructed value for the type T is used.) + // + // Returns: the desired configuration variable of the type T + + template + const T load(const Map& map, const char* key, const T& default_v = T{}) + { + using namespace std; + boost::optional retval; + std::string error_msg = ""; + try { + auto& v = value_slot_by_name(map, key); + if (!getter{}.get(v, retval)) { + if (VERBOSE_CONFIGURATION_LOADING) { + irods::log(LOG_ERROR, fmt::format("type or format error loading key {}", key)); + } + } + } + catch (const no_such_key& e) { // key not found; fall through to rely on the default value + } + catch (std::exception& e) { + error_msg = fmt::format("Exception - {}", e.what()); + } + catch (...) { + error_msg = "Unknown error"; + } + + if (error_msg.size()) { + irods::log(LOG_ERROR, error_msg); + } + if (!retval) { + if (VERBOSE_CONFIGURATION_LOADING) { + irods::log(LOG_ERROR, fmt::format("Using default value of {} = {}", key, default_v)); + } + return default_v; + } + return *retval; + } + + } //namespace configuration_parameters + + namespace policy + { + // Policy Naming Examples + // irods_policy____ + // irods_policy_indexing_object_index_ + // irods_policy_indexing_collection_index_ + // irods_policy_indexing_metadata_index_ + // irods_policy_indexing_object_purge_ + // irods_policy_indexing_collection_purge_ + // irods_policy_indexing_metadata_purge_ + + static constexpr auto prefix = "irods_policy_indexing"; + std::string compose_policy_name(const std::string& _prefix, const std::string& _technology); + + namespace object + { + static const std::string index{"irods_policy_indexing_object_index"}; + static const std::string purge{"irods_policy_indexing_object_purge"}; + } // namespace object + + namespace metadata + { + static const std::string index{"irods_policy_indexing_metadata_index"}; + static const std::string purge{"irods_policy_indexing_metadata_purge"}; + } // namespace metadata + + namespace collection + { + static const std::string index{"irods_policy_indexing_collection_index"}; + static const std::string purge{"irods_policy_indexing_collection_purge"}; + } // namespace collection + } // namespace policy + + std::string operation_and_index_types_to_policy_name(const std::string& _operation_type, + const std::string& _index_type); + + namespace schedule + { + static const std::string object{"irods_policy_schedule_object_indexing"}; + static const std::string collection{"irods_policy_schedule_collection_indexing"}; + } // namespace schedule + + namespace index_type + { + static const std::string full_text{"full_text"}; + static const std::string metadata{"metadata"}; + } // namespace index_type + + namespace operation_type + { + static const std::string index{"index"}; + static const std::string purge{"purge"}; + } // namespace operation_type + + struct configuration + { + // metadata attributes + std::string index{"irods::indexing::index"}; + std::string flag{"irods::indexing::flag"}; + + // basic configuration + int minimum_delay_time{1}; + int maximum_delay_time{30}; + int job_limit{}; + std::string delay_parameters{"60s DOUBLE UNTIL SUCCESS OR 5 TIMES"}; + + int log_level{LOG_DEBUG}; + std::string collection_test_flag; + + const std::string instance_name; + + explicit configuration(const std::string& _instance_name); + }; // struct configuration +} // namespace irods::indexing + +#endif // IRODS_CAPABILITY_INDEXING_CONFIGURATION_HPP diff --git a/cpp_json_kw.hpp b/cpp_json_kw.hpp index 7bff749..8b8693f 100644 --- a/cpp_json_kw.hpp +++ b/cpp_json_kw.hpp @@ -1,34 +1,39 @@ -#ifndef CPP_JSON_KW__HPP -#define CPP_JSON_KW__HPP +#ifndef IRODS_CAPABILITY_INDEXING_CPP_JSON_KW_HPP +#define IRODS_CAPABILITY_INDEXING_CPP_JSON_KW_HPP + +#include -#include -#include #include -#include -using nlohmann::json; +#include +#include +#include template -struct mapped_json_value {bool success ; std::optional value;}; +struct mapped_json_value +{ + bool success; + std::optional value; +}; // Extract a value by T (the type) and key (a string lookup key from a JSON object) // The returned values are, in order: -// - a boolean (success flag) +// - a boolean (success flag) // - a std::optional containing the retrieved value on success. template -auto kws_get(const nlohmann::json &j, const std::string & key) -> mapped_json_value +auto kws_get(const nlohmann::json& j, const std::string& key) -> mapped_json_value { - if (auto iter = j.find(key); iter != j.end()) { - try { - return { true, iter->get() }; - } - catch (std::exception & e) { - std::cerr << "bad conversion: " << e.what() << std::endl; - throw; - } - } - return {}; + if (auto iter = j.find(key); iter != j.end()) { + try { + return {true, iter->get()}; + } + catch (std::exception& e) { + rodsLog(LOG_ERROR, "%s: bad conversion: %s", __func__, e.what()); + throw; + } + } + return {}; } /* // SAMPLE USAGE @@ -54,4 +59,4 @@ int main (int argc, char** argv) * */ -#endif // CPP_JSON_KW__HPP +#endif // IRODS_CAPABILITY_INDEXING_CPP_JSON_KW_HPP diff --git a/document_type.cmake b/document_type.cmake deleted file mode 100644 index a2f2c3c..0000000 --- a/document_type.cmake +++ /dev/null @@ -1,79 +0,0 @@ -set(POLICY_NAME "document_type") - -string(REPLACE "_" "-" POLICY_NAME_HYPHENS ${POLICY_NAME}) -set(IRODS_PACKAGE_COMPONENT_POLICY_NAME "${POLICY_NAME_HYPHENS}") -string(TOUPPER ${IRODS_PACKAGE_COMPONENT_POLICY_NAME} IRODS_PACKAGE_COMPONENT_POLICY_NAME_UPPERCASE) - -set(TARGET_NAME "${IRODS_TARGET_NAME_PREFIX}-${POLICY_NAME}") -string(REPLACE "_" "-" TARGET_NAME_HYPHENS ${TARGET_NAME}) - -set( - IRODS_PLUGIN_POLICY_COMPILE_DEFINITIONS - RODS_SERVER - ENABLE_RE - ) - -set( - IRODS_PLUGIN_POLICY_LINK_LIBRARIES - irods_server - ) - -add_library( - ${TARGET_NAME} - MODULE - ${CMAKE_SOURCE_DIR}/lib${TARGET_NAME}.cpp - ${CMAKE_SOURCE_DIR}/utilities.cpp - ${CMAKE_SOURCE_DIR}/configuration.cpp - ${CMAKE_SOURCE_DIR}/plugin_specific_configuration.cpp - ) - -target_include_directories( - ${TARGET_NAME} - PRIVATE - ${IRODS_INCLUDE_DIRS} - ${IRODS_EXTERNALS_FULLPATH_BOOST}/include - ${IRODS_EXTERNALS_FULLPATH_FMT}/include - ${IRODS_EXTERNALS_FULLPATH_JANSSON}/include - ${IRODS_EXTERNALS_FULLPATH_FMT}/include - ${CMAKE_CURRENT_SOURCE_DIR}/include - ) - -target_link_libraries( - ${TARGET_NAME} - PRIVATE - ${IRODS_PLUGIN_POLICY_LINK_LIBRARIES} - ${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_filesystem.so - ${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_regex.so - ${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_system.so - ${IRODS_EXTERNALS_FULLPATH_FMT}/lib/libfmt.so - irods_common - nlohmann_json::nlohmann_json - ) - -target_compile_definitions(${TARGET_NAME} PRIVATE ${IRODS_PLUGIN_POLICY_COMPILE_DEFINITIONS} ${IRODS_COMPILE_DEFINITIONS} ${IRODS_COMPILE_DEFINITIONS_PRIVATE} BOOST_SYSTEM_NO_DEPRECATED) -target_compile_options(${TARGET_NAME} PRIVATE -Wno-write-strings) -set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD ${IRODS_CXX_STANDARD}) - -install( - TARGETS - ${TARGET_NAME} - LIBRARY - DESTINATION ${IRODS_PLUGINS_DIRECTORY}/rule_engines - COMPONENT ${IRODS_PACKAGE_COMPONENT_POLICY_NAME} - ) - - -set(CPACK_DEBIAN_${IRODS_PACKAGE_COMPONENT_POLICY_NAME_UPPERCASE}_PACKAGE_NAME ${TARGET_NAME_HYPHENS}) -set(CPACK_PACKAGE_VERSION ${IRODS_PLUGIN_VERSION}) -set(CPACK_DEBIAN_${IRODS_PACKAGE_COMPONENT_POLICY_NAME_UPPERCASE}_PACKAGE_DEPENDS "${IRODS_PACKAGE_DEPENDENCIES_STRING}, irods-server (= ${IRODS_VERSION}), irods-runtime (= ${IRODS_VERSION}), libc6") - -set(CPACK_RPM_${IRODS_PACKAGE_COMPONENT_POLICY_NAME_UPPERCASE}_PACKAGE_NAME ${TARGET_NAME_HYPHENS}) - -set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_SOURCE_DIR}/packaging/${POLICY_NAME}/postinst;") -set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_SOURCE_DIR}/packaging/${POLICY_NAME}/postinst") - -if (IRODS_LINUX_DISTRIBUTION_NAME STREQUAL "centos" OR IRODS_LINUX_DISTRIBUTION_NAME STREQUAL "centos linux" OR IRODS_LINUX_DISTRIBUTION_NAME STREQUAL "almalinux" OR IRODS_LINUX_DISTRIBUTION_NAME STREQUAL "rocky") - set(CPACK_RPM_${IRODS_PACKAGE_COMPONENT_POLICY_NAME}_PACKAGE_REQUIRES "${IRODS_PACKAGE_DEPENDENCIES_STRING}, irods-server = ${IRODS_VERSION}, irods-runtime = ${IRODS_VERSION}, openssl") -elseif (IRODS_LINUX_DISTRIBUTION_NAME STREQUAL "opensuse") - set(CPACK_RPM_${IRODS_PACKAGE_COMPONENT_POLICY_NAME}_PACKAGE_REQUIRES "${IRODS_PACKAGE_DEPENDENCIES_STRING}, irods-server = ${IRODS_VERSION}, irods-runtime = ${IRODS_VERSION}, libopenssl1_0_0") -endif() diff --git a/elasticsearch.cmake b/elasticsearch.cmake index 6411690..80e01b6 100644 --- a/elasticsearch.cmake +++ b/elasticsearch.cmake @@ -9,8 +9,6 @@ string(REPLACE "_" "-" TARGET_NAME_HYPHENS ${TARGET_NAME}) include(IrodsExternals) -IRODS_MACRO_CHECK_DEPENDENCY_SET_FULLPATH_ADD_TO_IRODS_PACKAGE_DEPENDENCIES_LIST(ELASTICCLIENT elasticlientd68e30e3-0) - string(REPLACE ";" ", " ${TARGET_NAME}_PACKAGE_DEPENDENCIES_STRING "${IRODS_PACKAGE_DEPENDENCIES_LIST}") unset(IRODS_PACKAGE_DEPENDENCIES_LIST) @@ -18,50 +16,52 @@ set( IRODS_PLUGIN_POLICY_COMPILE_DEFINITIONS RODS_SERVER ENABLE_RE - ) +) set( IRODS_PLUGIN_POLICY_LINK_LIBRARIES irods_server - ) +) add_library( - ${TARGET_NAME} - MODULE - ${CMAKE_SOURCE_DIR}/lib${TARGET_NAME}.cpp - ${CMAKE_SOURCE_DIR}/utilities.cpp - ${CMAKE_SOURCE_DIR}/configuration.cpp - ${CMAKE_SOURCE_DIR}/plugin_specific_configuration.cpp - ) + ${TARGET_NAME} + MODULE + ${CMAKE_SOURCE_DIR}/lib${TARGET_NAME}.cpp + ${CMAKE_SOURCE_DIR}/utilities.cpp + ${CMAKE_SOURCE_DIR}/configuration.cpp + ${CMAKE_SOURCE_DIR}/plugin_specific_configuration.cpp +) target_include_directories( - ${TARGET_NAME} - PRIVATE - ${IRODS_INCLUDE_DIRS} - ${IRODS_EXTERNALS_FULLPATH_BOOST}/include - ${IRODS_EXTERNALS_FULLPATH_FMT}/include - ${IRODS_EXTERNALS_FULLPATH_JANSSON}/include - ${IRODS_EXTERNALS_FULLPATH_FMT}/include - ${CMAKE_CURRENT_SOURCE_DIR}/include - ${IRODS_EXTERNALS_FULLPATH_ELASTICCLIENT}/include/ - ) + ${TARGET_NAME} + PRIVATE + ${IRODS_INCLUDE_DIRS} + ${IRODS_EXTERNALS_FULLPATH_BOOST}/include + ${IRODS_EXTERNALS_FULLPATH_FMT}/include +) target_link_libraries( - ${TARGET_NAME} - PRIVATE - ${IRODS_PLUGIN_POLICY_LINK_LIBRARIES} - ${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_filesystem.so - ${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_regex.so - ${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_system.so - ${IRODS_EXTERNALS_FULLPATH_FMT}/lib/libfmt.so - ${IRODS_EXTERNALS_FULLPATH_ELASTICCLIENT}/lib/libelasticlient.so - ${IRODS_EXTERNALS_FULLPATH_ELASTICCLIENT}/lib/libjsoncpp.so - ${IRODS_EXTERNALS_FULLPATH_ELASTICCLIENT}/lib/libcpr.so - irods_common - nlohmann_json::nlohmann_json - ) - -target_compile_definitions(${TARGET_NAME} PRIVATE ${IRODS_PLUGIN_POLICY_COMPILE_DEFINITIONS} ${IRODS_COMPILE_DEFINITIONS} ${IRODS_COMPILE_DEFINITIONS_PRIVATE} BOOST_SYSTEM_NO_DEPRECATED) + ${TARGET_NAME} + PRIVATE + ${IRODS_PLUGIN_POLICY_LINK_LIBRARIES} + ${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_filesystem.so + ${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_regex.so + ${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_system.so + ${IRODS_EXTERNALS_FULLPATH_BOOST}/lib/libboost_url.so + ${IRODS_EXTERNALS_FULLPATH_FMT}/lib/libfmt.so + irods_common + nlohmann_json::nlohmann_json +) + +target_compile_definitions( + ${TARGET_NAME} + PRIVATE + ${IRODS_PLUGIN_POLICY_COMPILE_DEFINITIONS} + ${IRODS_COMPILE_DEFINITIONS} + ${IRODS_COMPILE_DEFINITIONS_PRIVATE} + BOOST_SYSTEM_NO_DEPRECATED + IRODS_PLUGIN_VERSION="${IRODS_PLUGIN_VERSION}" +) target_compile_options(${TARGET_NAME} PRIVATE -Wno-write-strings) set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD ${IRODS_CXX_STANDARD}) @@ -71,7 +71,7 @@ install( LIBRARY DESTINATION ${IRODS_PLUGINS_DIRECTORY}/rule_engines COMPONENT ${IRODS_PACKAGE_COMPONENT_POLICY_NAME} - ) +) set(CPACK_PACKAGE_VERSION ${IRODS_PLUGIN_VERSION}) set(CPACK_DEBIAN_${IRODS_PACKAGE_COMPONENT_POLICY_NAME_UPPERCASE}_PACKAGE_NAME ${TARGET_NAME_HYPHENS}) @@ -84,8 +84,7 @@ set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_SOURCE_DIR}/packaging/${POLICY_N set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_SOURCE_DIR}/packaging/${POLICY_NAME}/postinst") if (IRODS_LINUX_DISTRIBUTION_NAME STREQUAL "centos" OR IRODS_LINUX_DISTRIBUTION_NAME STREQUAL "centos linux" OR IRODS_LINUX_DISTRIBUTION_NAME STREQUAL "almalinux" OR IRODS_LINUX_DISTRIBUTION_NAME STREQUAL "rocky") - set(CPACK_RPM_${IRODS_PACKAGE_COMPONENT_POLICY_NAME}_PACKAGE_REQUIRES "${IRODS_PACKAGE_DEPENDENCIES_STRING}, ${${TARGET_NAME}_PACKAGE_DEPENDENCIES_STRING}, irods-server = ${IRODS_VERSION}, irods-runtime = ${IRODS_VERSION}, openssl") + set(CPACK_RPM_${IRODS_PACKAGE_COMPONENT_POLICY_NAME}_PACKAGE_REQUIRES "${IRODS_PACKAGE_DEPENDENCIES_STRING}, ${${TARGET_NAME}_PACKAGE_DEPENDENCIES_STRING}, irods-server = ${IRODS_VERSION}, irods-runtime = ${IRODS_VERSION}, openssl") elseif (IRODS_LINUX_DISTRIBUTION_NAME STREQUAL "opensuse") - set(CPACK_RPM_${IRODS_PACKAGE_COMPONENT_POLICY_NAME}_PACKAGE_REQUIRES "${IRODS_PACKAGE_DEPENDENCIES_STRING}, irods-server = ${IRODS_VERSION}, irods-runtime = ${IRODS_VERSION}, libopenssl1_0_0") + set(CPACK_RPM_${IRODS_PACKAGE_COMPONENT_POLICY_NAME}_PACKAGE_REQUIRES "${IRODS_PACKAGE_DEPENDENCIES_STRING}, irods-server = ${IRODS_VERSION}, irods-runtime = ${IRODS_VERSION}, libopenssl1_0_0") endif() - diff --git a/es_mapping.json b/es_mapping.json index b13ffeb..e2e973a 100644 --- a/es_mapping.json +++ b/es_mapping.json @@ -1,45 +1,48 @@ -{"properties": { - "url": { - "type": "text" - }, - "zoneName": { - "type": "keyword" - }, - "absolutePath": { - "type": "keyword" - }, - "fileName": { - "type": "text" - }, - "parentPath": { - "type": "text" - }, - "isFile": { - "type": "boolean" - }, - "dataSize": { - "type": "long" - }, - "mimeType": { - "type": "keyword" - }, - "lastModifiedDate": { - "type": "date", - "format": "epoch_second" - }, - "metadataEntries": { - "type": "nested", - "properties": { - "attribute": { - "type": "keyword" - }, - "value": { - "type": "text" - }, - "unit": { - "type": "keyword" - } - } - } - } - } +{ + "mappings": { + "properties": { + "url": { + "type": "text" + }, + "zoneName": { + "type": "keyword" + }, + "absolutePath": { + "type": "keyword" + }, + "fileName": { + "type": "text" + }, + "parentPath": { + "type": "text" + }, + "isFile": { + "type": "boolean" + }, + "dataSize": { + "type": "long" + }, + "mimeType": { + "type": "keyword" + }, + "lastModifiedDate": { + "type": "date", + "format": "epoch_second" + }, + "metadataEntries": { + "type": "nested", + "properties": { + "attribute": { + "type": "keyword" + }, + "value": { + "type": "text" + }, + "unit": { + "type": "keyword" + } + } + } + } + } +} diff --git a/indexing_utilities.cpp b/indexing_utilities.cpp index f2ff131..ae77d1a 100644 --- a/indexing_utilities.cpp +++ b/indexing_utilities.cpp @@ -1,679 +1,635 @@ - -#include -#include -#include "utilities.hpp" #include "indexing_utilities.hpp" -#define IRODS_METADATA_ENABLE_SERVER_SIDE_API -#define IRODS_QUERY_ENABLE_SERVER_SIDE_API +#include "cpp_json_kw.hpp" +#include "utilities.hpp" -#include #include - -#include +#include +#include #include - +#include #include +#include #include #include -#include -#include -#define IRODS_FILESYSTEM_ENABLE_SERVER_SIDE_API +#ifndef IRODS_METADATA_ENABLE_SERVER_SIDE_API +# define IRODS_METADATA_ENABLE_SERVER_SIDE_API +#endif +#include + +#ifndef IRODS_QUERY_ENABLE_SERVER_SIDE_API +# define IRODS_QUERY_ENABLE_SERVER_SIDE_API +#endif +#include + +#ifndef IRODS_FILESYSTEM_ENABLE_SERVER_SIDE_API +# define IRODS_FILESYSTEM_ENABLE_SERVER_SIDE_API +#endif #include +#include #include -#include #include -#include #include #include -#include -#include -#include -#include +#include #include #include -#include "cpp_json_kw.hpp" +#include -using namespace std::string_literals; +#include +#include +#include -int _delayExec( - const char *inActionCall, - const char *recoveryActionCall, - const char *delayCondition, - ruleExecInfo_t *rei ); - -namespace irods { - namespace indexing { - indexer::indexer( - ruleExecInfo_t* _rei, - const std::string& _instance_name) : - rei_(_rei) - , comm_(_rei->rsComm) - , config_(_instance_name) { - } // indexer - - // -=-=-=-= Launch a new delayed task to execute index or purge operation - // - - void indexer::schedule_indexing_policy( - const std::string& _json, - const std::string& _params) { - const int delay_err = _delayExec( - _json.c_str(), - "", - _params.c_str(), - rei_); - if(delay_err < 0) { - THROW( - delay_err, - "delayExec failed"); - } - } // schedule_indexing_policy - - // -=-=-=-= Does the given AVU exist on the collection of the given name? - // - - bool indexer::metadata_exists_on_collection( - const std::string& _collection_name, - const std::string& _attribute, - const std::string& _value, - const std::string& _units ) { - try { - std::string query_str { - boost::str( - boost::format("SELECT META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS WHERE META_COLL_ATTR_NAME = '%s' " - "and COLL_NAME = '%s'") % _attribute % _collection_name) }; - query qobj{rei_->rsComm, query_str, 1}; - - if(qobj.size() == 0) { - return false; - } - - for(auto results : qobj) { - if(results[0] == _value && - results[1] == _units) { - return true; - } - } - - return false; - - } - catch( irods::exception& _e) { - return false; - } - } // metadata_exists_on_collection - - void indexer::schedule_collection_operation( - const std::string& _operation_type, - const std::string& _collection_name, - const std::string& _user_name, - const std::string& _indexer_string, - const std::string& _indexer) { - - rodsLog( - config_.log_level, - "irods::indexing::collection indexing collection [%s] with [%s] type [%s]", - _collection_name.c_str(), - _indexer_string.c_str(), - _indexer.c_str()); - - std::string index_name, index_type; - std::tie(index_name, index_type) = parse_indexer_string(_indexer_string); - const auto policy_name = _operation_type == irods::indexing::operation_type::index ? - irods::indexing::policy::collection::index : - irods::indexing::policy::collection::purge; - using json = nlohmann::json; - json rule_obj; - rule_obj["rule-engine-operation"] = policy_name; - rule_obj["rule-engine-instance-name"] = config_.instance_name_; - rule_obj["collection-name"] = _collection_name; - rule_obj["user-name"] = _user_name; - rule_obj["indexer"] = _indexer; - rule_obj["index-name"] = index_name; - rule_obj["index-type"] = index_type; - - const auto delay_err = _delayExec( - rule_obj.dump().c_str(), - "", - generate_delay_execution_parameters().c_str(), - rei_); - if(delay_err < 0) { - THROW( - delay_err, - boost::format("queue collection indexing failed for [%s] indexer [%s] type [%s]") % - _collection_name % - _indexer % - index_type); - } - - rodsLog( - config_.log_level, - "irods::indexing::collection indexing collection [%s] with [%s] type [%s]", - _collection_name.c_str(), - _indexer.c_str(), - index_type.c_str()); - } // schedule_collection_operation - - std::vector indexer::get_indexing_resource_names() { - std::string query_str { - boost::str( - boost::format("SELECT RESC_NAME WHERE META_RESC_ATTR_NAME = '%s' AND META_RESC_ATTR_VALUE = 'true'") - % config_.index)}; - - query qobj{comm_, query_str}; - std::vector ret_val; - for(const auto& row : qobj) { - ret_val.push_back(row[0]); - } - - return ret_val; - - } // get_indexing_resource_names - - std::string indexer::get_indexing_resource_name_for_object( - const std::string _object_path, - std::vector _resource_names) { - boost::filesystem::path p{_object_path}; - std::string coll_name = p.parent_path().string(); - std::string data_name = p.filename().string(); - - std::string query_str { - boost::str( - boost::format("SELECT RESC_NAME WHERE DATA_NAME = '%s' AND COLL_NAME = '%s'") % - data_name % - coll_name) }; - query qobj{comm_, query_str, 1}; - if(qobj.size() == 0) { - THROW( - CAT_NO_ROWS_FOUND, - boost::format("no resource names found for object [%s]") - % _object_path); - } - - if(_resource_names.empty()) { - return qobj.front()[0]; - } - - for(const auto& resource_name_for_object : qobj) { - for( const auto& resource_name_for_indexing : _resource_names) { - if(resource_name_for_object[0] == resource_name_for_indexing) { - return resource_name_for_object[0]; - } - } - } - - THROW( - SYS_INVALID_INPUT_PARAM, - boost::format("failed to find indexing resource for object [%s]") - % _object_path); - - } // get_indexing_resource_name_for_object - - bool indexer::resource_is_indexable( - const std::string _source_resource, - std::vector _resource_names) { - if(_source_resource == EMPTY_RESOURCE_NAME || - _resource_names.empty()) { - return true; - } - - for( const auto& resource_name_for_indexing : _resource_names) { - if(_source_resource == resource_name_for_indexing) { - return true; - } - } - - return false; - } // resource_is_indexable - - void indexer::schedule_metadata_purge_for_recursive_rm_object( const std::string& logical_path, - const nlohmann::json & recurse_info ) { - schedule_policy_event_for_object( - /*policy_name, */ "irods_policy_recursive_rm_object_by_path", - logical_path, - /*_user_name,*/ "", - EMPTY_RESOURCE_NAME, - /* _indexer, */ "elasticsearch", - recurse_info.dump(), - /* _index_type, */ "metadata", - generate_delay_execution_parameters()); - } - - // - Starting at _collection_name , recurse over every sub-element of the tree - // - (including data objects and collections and starting with the root). - // - Call schedule_policy_event_for_object for every object or collection - // - - void indexer::schedule_policy_events_for_collection( - const std::string& _operation_type, - const std::string& _collection_name, - const std::string& _user_name, - const std::string& _indexer, - const std::string& _index_name, - const std::string& _index_type) { - namespace fs = irods::experimental::filesystem; - namespace fsvr = irods::experimental::filesystem::server; - using fsp = fs::path; - rsComm_t& comm = *rei_->rsComm; - - const auto indexing_resources = get_indexing_resource_names(); - const auto policy_name = operation_and_index_types_to_policy_name( - _operation_type, - _index_type); - fsp start_path{_collection_name}; - - int job_limit = config_.job_limit; - int job_max = std::numeric_limits::max(); - - if (job_limit <= 0) { - job_limit = job_max; // 0 is the advertised default - if (job_limit < 0) { // negative value could result from a value > INT_MAX - irods::log(LOG_WARNING, - fmt::format( R"Qu(Parameter "job_limit_per_collection_indexing_operation" is )Qu" - "too large or negative, clipped to: {}", job_max)); - } - } - - auto iter_end = fsvr::recursive_collection_iterator{}; - auto iter = fsvr::recursive_collection_iterator{comm, start_path}; - - const std::hash string_hasher; - const auto key1 = string_hasher( _collection_name ); - const auto key2 = string_hasher( config_.instance_name_ ); - - const std::string unique_key = std::to_string( key1 ) + "-" + std::to_string( key2 ); - - static const auto double_quote = R"(")"s; - const auto JOB_QUERY_STRING = fmt::format (R"(SELECT count(RULE_EXEC_ID) WHERE RULE_EXEC_NAME like '%"{}"%')", unique_key); - const auto LOW_WATER_MARK {job_limit / 2}; - auto n_jobs {job_limit * 0U}; - - namespace ix = irods::experimental; - namespace ixm = ix::metadata; - using entity_type = ix::entity::entity_type; - - ixm::avu md_flag {config_.flag, __func__}; - - auto set_flag = (config_.collection_test_flag != ""); - if (set_flag) ixm::set(comm, md_flag, entity_type::collection, config_.collection_test_flag); - irods::at_scope_exit at_exit_ {[&]{ if (set_flag) ixm::remove(comm, md_flag, entity_type::collection, config_.collection_test_flag); }}; - - struct query_failed : public std::runtime_error { - query_failed( const std::string& e = "Query failed to fetch # of jobs active" ) - : std::runtime_error{e} {} - }; - struct job_limit_precision : public std::runtime_error { - job_limit_precision( const std::string& e = "Job Limits may not exceed 32-bit unsigned integer precision" ) - : std::runtime_error{e} {} - }; - - - for (auto path = start_path; ; ++iter) { - const auto s = fsvr::status(comm,path); - bool is_collection = fsvr::is_collection(s); - bool is_data_object = fsvr::is_data_object(s); - if (is_data_object || is_collection) { - try { - std::string resc_name; - if (is_data_object) { - resc_name = get_indexing_resource_name_for_object( - path.string(), - indexing_resources); - } - - if (job_limit > 0 && n_jobs >= job_limit) { - // The job limit parameter should be a large number, in the thousands or more perhaps, but small - // enough so that indexing your largest collections doesn't fill up all of virtual memory. - for(;;) { - query qobj{comm_, JOB_QUERY_STRING, 1}; - for (const auto & row: qobj) { - auto count = std::stol( row[0] ); - if (count > job_max) { throw job_limit_precision{}; } - n_jobs = count; - break; - } - // The approach to throttling is simply to wait until the number of delayed tasks falls - // down to the LOW_WATER_MARK and then exit the wait loop to fill up the task queue again. - // Because we're already in a delayed task, this does not impact the plugin's response time. - if (n_jobs > LOW_WATER_MARK) { - sleep(1); - } - else { - break; - } - } - } - - if ( ! (is_collection && _index_type == "full_text" )) { - schedule_policy_event_for_object( - policy_name, - path.string(), - _user_name, - EMPTY_RESOURCE_NAME, - _indexer, - _index_name, - _index_type, - generate_delay_execution_parameters(), - {},{},{}, - {{ "job_category_tag", unique_key }} ); - - ++n_jobs; - } - } - catch(const exception& _e) { - rodsLog( - LOG_ERROR, - "failed to find indexing resource (error code=[%ld]) for object [%s]",static_cast(_e.code()), - path.string().c_str()); - } - catch (const std::runtime_error & e) { - irods::log(LOG_ERROR,fmt::format("Abort indexing collection: {}",e.what())); - break; - } - if (iter != iter_end) { path = iter->path(); } - else { break; } - } // if collection or data object - } // for path - } // schedule_policy_events_for_collection - - /* - == //// void indexer::schedule_{INDEX-TYPE}_{EVENT}_event //// - == for combinations of INDEX-TYPE => ( full_text, metadata ) - == and EVENT => ( indexing, purge ) - == - == Calls schedule_policy_events_given_object_path(...) for the target path - */ - void indexer::schedule_full_text_indexing_event( - const std::string& _object_path, - const std::string& _user_name, - const std::string& _source_resource) { - - schedule_policy_events_given_object_path( - irods::indexing::operation_type::index, - irods::indexing::index_type::full_text, - _object_path, - _user_name, - _source_resource); - - } // schedule_full_text_indexing_event - - void indexer::schedule_full_text_purge_event( - const std::string& _object_path, - const std::string& _user_name) { - schedule_policy_events_given_object_path( - irods::indexing::operation_type::purge, - irods::indexing::index_type::full_text, - _object_path, - _user_name); - } // schedule_full_text_purge_event - - void indexer::schedule_metadata_indexing_event( - const std::string& _object_path, - const std::string& _user_name, - const std::string& _attribute, - const std::string& _value, - const std::string& _units) { - - schedule_policy_events_given_object_path( - irods::indexing::operation_type::index, - irods::indexing::index_type::metadata, - _object_path, - _user_name, - EMPTY_RESOURCE_NAME, - _attribute, - _value, - _units); - - } // schedule_metadata_indexing_event - - void indexer::schedule_metadata_purge_event( - const std::string& _object_path, - const std::string& _user_name, - const std::string& _attribute, - const std::string& _value, - const std::string& _units, - const std::string& opt_id) { - - schedule_policy_events_given_object_path( - irods::indexing::operation_type::purge, - irods::indexing::index_type::metadata, - _object_path, - _user_name, - EMPTY_RESOURCE_NAME, - _attribute, - _value, - _units, - opt_id ); // non-empty if an ID for the deleted object or collection is needed in the task - - } // schedule_metadata_purge_event - - // - Given an object path (data object or collection) and an operation type, - // - ascend collection hierarchy to find the indices for which policy events - // - must be scheduled on the given object path - // - - void indexer::schedule_policy_events_given_object_path( - const std::string& _operation_type, - const std::string& _index_type, - const std::string& _object_path, - const std::string& _user_name, - const std::string& _source_resource, - const std::string& _attribute, - const std::string& _value, - const std::string& _units, - const std::string& _opt_ID ) { - - using fsp = irods::experimental::filesystem::path; - namespace fsvr = irods::experimental::filesystem::server; - auto is_data_object = false; - fsp full_path{_object_path}; - - const auto s = fsvr::status(*rei_->rsComm,full_path); - if (fsvr::is_data_object(s)) { - is_data_object = true; - const auto indexing_resources = get_indexing_resource_names(); - if(!resource_is_indexable(_source_resource, indexing_resources)) { - rodsLog( - LOG_ERROR, - "resource [%s] is not indexable for object [%s]", - _source_resource.c_str(), - _object_path.c_str()); - return; - } - } - - std::vector processed_indicies; - auto coll = (is_data_object ? full_path.parent_path() : full_path); - while(!coll.empty()) { - try { - auto metadata = - get_metadata_for_collection( - coll.string(), - config_.index); - for(const auto& row : metadata) { - const auto& indexer_string = row.first; - const auto& indexer = row.second; - std::string index_name, index_type; - std::tie(index_name, index_type) = parse_indexer_string(indexer_string); - if(_index_type == index_type) { - auto itr = std::find( - std::begin(processed_indicies), - std::end(processed_indicies), - index_name+index_type); - if(itr != std::end(processed_indicies)) { - continue; - } - const auto policy_name = operation_and_index_types_to_policy_name( - _operation_type, - _index_type); - schedule_policy_event_for_object( - policy_name, - _object_path, - _user_name, - _source_resource, - indexer, - index_name, - index_type, - generate_delay_execution_parameters(), - _attribute, - _value, - _units, - {{ "_obj_optional_ID", _opt_ID}} // _extra_options - ); - processed_indicies.push_back(index_name+index_type); - } - } // for row - } - catch(const irods::exception&) { - } - if (0 == coll.compare(coll.root_collection())) { break; } - coll = coll.parent_path(); - - } // while - - } // schedule_policy_events_given_object_path - - std::string indexer::generate_delay_execution_parameters() { - std::string params{config_.delay_parameters + "" + config_.instance_name_ + ""}; - - int min_time = config_.minimum_delay_time <= 0 ? 1 : config_.minimum_delay_time; - int max_time = config_.maximum_delay_time <= 0 ? 30 : config_.maximum_delay_time; - - std::string sleep_time{"1"}; - try { - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> dis(min_time, max_time); - sleep_time = boost::lexical_cast(dis(gen)); - } - catch(const boost::bad_lexical_cast&) {} - - params += ""+sleep_time+"s"; - - rodsLog( - config_.log_level, - "irods::storage_tiering :: delay params min [%d] max [%d] computed [%s]", - min_time, - max_time, - params.c_str()); - - return params; - - } // generate_delay_execution_parameters - - void indexer::get_metadata_for_data_object( - const std::string& _meta_attr_name, - const std::string& _object_path, - std::string& _value, - std::string& _unit ) { - boost::filesystem::path p{_object_path}; - std::string coll_name = p.parent_path().string(); - std::string data_name = p.filename().string(); - - std::string query_str { - boost::str( - boost::format("SELECT META_DATA_ATTR_VALUE WHERE META_DATA_ATTR_NAME = '%s' and DATA_NAME = '%s' AND COLL_NAME = '%s'") % - _meta_attr_name % - data_name % - coll_name) }; - query qobj{comm_, query_str, 1}; - if(qobj.size() == 0) { - THROW( - CAT_NO_ROWS_FOUND, - boost::format("no results found for object [%s] with attribute [%s]") % - _object_path % - _meta_attr_name); - } - - _value = qobj.front()[0]; - _unit = qobj.front()[1]; - } // get_metadata_for_data_object - - indexer::metadata_results indexer::get_metadata_for_collection( - const std::string& _collection, - const std::string& _meta_attr_name) { - - std::string query_str { - boost::str( - boost::format("SELECT META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS WHERE META_COLL_ATTR_NAME = '%s' and COLL_NAME = '%s'") % - _meta_attr_name % - _collection) }; - query qobj{comm_, query_str}; - if(qobj.size() == 0) { - THROW( - CAT_NO_ROWS_FOUND, - boost::format("no results found for collection [%s] with attribute [%s]") % - _collection % - _meta_attr_name); - } - - metadata_results ret_val; - for(const auto& row : qobj) { - ret_val.push_back(std::make_pair(row[0], row[1])); - } - - return ret_val; - } // get_metadata_for_collection - - void indexer::schedule_policy_event_for_object( - const std::string& _event, - const std::string& _object_path, - const std::string& _user_name, - const std::string& _source_resource, - const std::string& _indexer, - const std::string& _index_name, - const std::string& _index_type, - const std::string& _data_movement_params, - const std::string& _attribute, - const std::string& _value, - const std::string& _units, - const nlohmann::json & _extra_options // = {} - ) { - - using json = nlohmann::json; - - const auto & [_ID_bool, _obj_optional_ID] = kws_get(_extra_options, "_obj_optional_ID"); - const auto & [_tag_bool, job_category_tag] = kws_get(_extra_options, "job_category_tag"); - - json rule_obj; - rule_obj["rule-engine-operation"] = _event; - rule_obj["rule-engine-instance-name"] = config_.instance_name_; - rule_obj["object-path"] = _ID_bool && ! (*_obj_optional_ID).empty() ? *_obj_optional_ID : _object_path; - rule_obj["user-name"] = _user_name; - rule_obj["indexer"] = _indexer; - rule_obj["index-name"] = _index_name; - rule_obj["index-type"] = _index_type; - rule_obj["source-resource"] = _source_resource; - rule_obj["attribute"] = _attribute; - rule_obj["value"] = _value; - rule_obj["units"] = _units; - if (_tag_bool && job_category_tag) { - rule_obj["job-category-tag"] = *job_category_tag; - } - - const auto delay_err = _delayExec( - rule_obj.dump().c_str(), - "", - _data_movement_params.c_str(), - rei_); - if(delay_err < 0) { - THROW( - delay_err, - boost::format("queue indexing event failed for object [%s] indexer [%s] type [%s]") % - _object_path % - _indexer % - _index_type); - } - - rodsLog( - config_.log_level, - "irods::indexing::indexer indexing object [%s] with [%s] type [%s]", - _object_path.c_str(), - _indexer.c_str(), - _index_type.c_str()); - - } // schedule_policy_event_for_object - } // namespace indexing -}; // namespace irods +using namespace std::string_literals; +int _delayExec(const char* inActionCall, + const char* recoveryActionCall, + const char* delayCondition, + ruleExecInfo_t* rei); + +namespace irods::indexing +{ + indexer::indexer(ruleExecInfo_t* _rei, const std::string& _instance_name) + : rei_(_rei) + , comm_(_rei->rsComm) + , config_(_instance_name) + { + } // indexer + + // -=-=-=-= Launch a new delayed task to execute index or purge operation + // - + void indexer::schedule_indexing_policy(const std::string& _json, const std::string& _params) + { + const int delay_err = _delayExec(_json.c_str(), "", _params.c_str(), rei_); + if (delay_err < 0) { + THROW(delay_err, "delayExec failed"); + } + } // schedule_indexing_policy + + // -=-=-=-= Does the given AVU exist on the collection of the given name? + // - + bool indexer::metadata_exists_on_collection(const std::string& _collection_name, + const std::string& _attribute, + const std::string& _value, + const std::string& _units) + { + try { + std::string query_str{boost::str( + boost::format("SELECT META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS WHERE META_COLL_ATTR_NAME = '%s' " + "and COLL_NAME = '%s'") % + _attribute % _collection_name)}; + query qobj{rei_->rsComm, query_str, 1}; + + if (qobj.size() == 0) { + return false; + } + + for (auto results : qobj) { + if (results[0] == _value && results[1] == _units) { + return true; + } + } + + return false; + } + catch (irods::exception& _e) { + return false; + } + } // metadata_exists_on_collection + + void indexer::schedule_collection_operation(const std::string& _operation_type, + const std::string& _collection_name, + const std::string& _user_name, + const std::string& _indexer_string, + const std::string& _indexer) + { + rodsLog(config_.log_level, + "irods::indexing::collection indexing collection [%s] with [%s] type [%s]", + _collection_name.c_str(), + _indexer_string.c_str(), + _indexer.c_str()); + + std::string index_name, index_type; + std::tie(index_name, index_type) = parse_indexer_string(_indexer_string); + const auto policy_name = _operation_type == irods::indexing::operation_type::index + ? irods::indexing::policy::collection::index + : irods::indexing::policy::collection::purge; + using json = nlohmann::json; + json rule_obj; + rule_obj["rule-engine-operation"] = policy_name; + rule_obj["rule-engine-instance-name"] = config_.instance_name; + rule_obj["collection-name"] = _collection_name; + rule_obj["user-name"] = _user_name; + rule_obj["indexer"] = _indexer; + rule_obj["index-name"] = index_name; + rule_obj["index-type"] = index_type; + + const auto delay_err = + _delayExec(rule_obj.dump().c_str(), "", generate_delay_execution_parameters().c_str(), rei_); + if (delay_err < 0) { + THROW(delay_err, + boost::format("queue collection indexing failed for [%s] indexer [%s] type [%s]") % _collection_name % + _indexer % index_type); + } + + rodsLog(config_.log_level, + "irods::indexing::collection indexing collection [%s] with [%s] type [%s]", + _collection_name.c_str(), + _indexer.c_str(), + index_type.c_str()); + } // schedule_collection_operation + + std::vector indexer::get_indexing_resource_names() + { + std::string query_str{boost::str( + boost::format("SELECT RESC_NAME WHERE META_RESC_ATTR_NAME = '%s' AND META_RESC_ATTR_VALUE = 'true'") % + config_.index)}; + + query qobj{comm_, query_str}; + std::vector ret_val; + for (const auto& row : qobj) { + ret_val.push_back(row[0]); + } + + return ret_val; + + } // get_indexing_resource_names + + std::string indexer::get_indexing_resource_name_for_object(const std::string _object_path, + std::vector _resource_names) + { + boost::filesystem::path p{_object_path}; + std::string coll_name = p.parent_path().string(); + std::string data_name = p.filename().string(); + + std::string query_str{boost::str(boost::format("SELECT RESC_NAME WHERE DATA_NAME = '%s' AND COLL_NAME = '%s'") % + data_name % coll_name)}; + query qobj{comm_, query_str, 1}; + if (qobj.size() == 0) { + THROW(CAT_NO_ROWS_FOUND, boost::format("no resource names found for object [%s]") % _object_path); + } + + if (_resource_names.empty()) { + return qobj.front()[0]; + } + + for (const auto& resource_name_for_object : qobj) { + for (const auto& resource_name_for_indexing : _resource_names) { + if (resource_name_for_object[0] == resource_name_for_indexing) { + return resource_name_for_object[0]; + } + } + } + + THROW( + SYS_INVALID_INPUT_PARAM, boost::format("failed to find indexing resource for object [%s]") % _object_path); + + } // get_indexing_resource_name_for_object + + bool indexer::resource_is_indexable(const std::string _source_resource, std::vector _resource_names) + { + if (_source_resource == EMPTY_RESOURCE_NAME || _resource_names.empty()) { + return true; + } + + for (const auto& resource_name_for_indexing : _resource_names) { + if (_source_resource == resource_name_for_indexing) { + return true; + } + } + + return false; + } // resource_is_indexable + + void indexer::schedule_metadata_purge_for_recursive_rm_object(const std::string& logical_path, + const nlohmann::json& recurse_info) + { + schedule_policy_event_for_object( + /*policy_name, */ "irods_policy_recursive_rm_object_by_path", + logical_path, + /*_user_name,*/ "", + EMPTY_RESOURCE_NAME, + /* _indexer, */ "elasticsearch", + recurse_info.dump(), + /* _index_type, */ "metadata", + generate_delay_execution_parameters()); + } + + // - Starting at _collection_name , recurse over every sub-element of the tree + // - (including data objects and collections and starting with the root). + // - Call schedule_policy_event_for_object for every object or collection + void indexer::schedule_policy_events_for_collection(const std::string& _operation_type, + const std::string& _collection_name, + const std::string& _user_name, + const std::string& _indexer, + const std::string& _index_name, + const std::string& _index_type) + { + namespace fs = irods::experimental::filesystem; + namespace fsvr = irods::experimental::filesystem::server; + using fsp = fs::path; + rsComm_t& comm = *rei_->rsComm; + + const auto indexing_resources = get_indexing_resource_names(); + const auto policy_name = operation_and_index_types_to_policy_name(_operation_type, _index_type); + fsp start_path{_collection_name}; + + int job_limit = config_.job_limit; + int job_max = std::numeric_limits::max(); + + if (job_limit <= 0) { + job_limit = job_max; // 0 is the advertised default + if (job_limit < 0) { // negative value could result from a value > INT_MAX + irods::log(LOG_WARNING, + fmt::format(R"Qu(Parameter "job_limit_per_collection_indexing_operation" is )Qu" + "too large or negative, clipped to: {}", + job_max)); + } + } + + auto iter_end = fsvr::recursive_collection_iterator{}; + auto iter = fsvr::recursive_collection_iterator{comm, start_path}; + + const std::hash string_hasher; + const auto key1 = string_hasher(_collection_name); + const auto key2 = string_hasher(config_.instance_name); + + const std::string unique_key = std::to_string(key1) + "-" + std::to_string(key2); + + static const auto double_quote = R"(")"s; + const auto JOB_QUERY_STRING = + fmt::format(R"(SELECT count(RULE_EXEC_ID) WHERE RULE_EXEC_NAME like '%"{}"%')", unique_key); + const auto LOW_WATER_MARK{job_limit / 2}; + auto n_jobs{job_limit * 0U}; + + namespace ix = irods::experimental; + namespace ixm = ix::metadata; + using entity_type = ix::entity::entity_type; + + ixm::avu md_flag{config_.flag, __func__}; + + auto set_flag = (config_.collection_test_flag != ""); + if (set_flag) { + ixm::set(comm, md_flag, entity_type::collection, config_.collection_test_flag); + } + + irods::at_scope_exit at_exit_{[&] { + if (set_flag) { + ixm::remove(comm, md_flag, entity_type::collection, config_.collection_test_flag); + } + }}; + + struct query_failed : public std::runtime_error + { + query_failed(const std::string& e = "Query failed to fetch # of jobs active") + : std::runtime_error{e} + { + } + }; + + struct job_limit_precision : public std::runtime_error + { + job_limit_precision(const std::string& e = "Job Limits may not exceed 32-bit unsigned integer precision") + : std::runtime_error{e} + { + } + }; + + for (auto path = start_path;; ++iter) { + const auto s = fsvr::status(comm, path); + bool is_collection = fsvr::is_collection(s); + bool is_data_object = fsvr::is_data_object(s); + if (is_data_object || is_collection) { + try { + std::string resc_name; + if (is_data_object) { + resc_name = get_indexing_resource_name_for_object(path.string(), indexing_resources); + } + + if (job_limit > 0 && n_jobs >= job_limit) { + // The job limit parameter should be a large number, in the thousands or more perhaps, but + // small enough so that indexing your largest collections doesn't fill up all of virtual + // memory. + while (true) { + query qobj{comm_, JOB_QUERY_STRING, 1}; + for (const auto& row : qobj) { + auto count = std::stol(row[0]); + if (count > job_max) { + throw job_limit_precision{}; + } + n_jobs = count; + break; + } + + // The approach to throttling is simply to wait until the number of delayed tasks falls + // down to the LOW_WATER_MARK and then exit the wait loop to fill up the task queue + // again. Because we're already in a delayed task, this does not impact the plugin's + // response time. + if (n_jobs > LOW_WATER_MARK) { + sleep(1); + } + else { + break; + } + } + } + + if (!(is_collection && _index_type == "full_text")) { + schedule_policy_event_for_object(policy_name, + path.string(), + _user_name, + EMPTY_RESOURCE_NAME, + _indexer, + _index_name, + _index_type, + generate_delay_execution_parameters(), + {}, + {}, + {}, + {{"job_category_tag", unique_key}}); + + ++n_jobs; + } + } + catch (const exception& _e) { + rodsLog(LOG_ERROR, + "failed to find indexing resource (error code=[%ld]) for object [%s]", + static_cast(_e.code()), + path.string().c_str()); + } + catch (const std::runtime_error& e) { + irods::log(LOG_ERROR, fmt::format("Abort indexing collection: {}", e.what())); + break; + } + + if (iter != iter_end) { + path = iter->path(); + } + else { + break; + } + } + } + } + + /* + == //// void indexer::schedule_{INDEX-TYPE}_{EVENT}_event //// + == for combinations of INDEX-TYPE => ( full_text, metadata ) + == and EVENT => ( indexing, purge ) + == + == Calls schedule_policy_events_given_object_path(...) for the target path + */ + void indexer::schedule_full_text_indexing_event(const std::string& _object_path, + const std::string& _user_name, + const std::string& _source_resource) + { + schedule_policy_events_given_object_path(irods::indexing::operation_type::index, + irods::indexing::index_type::full_text, + _object_path, + _user_name, + _source_resource); + } // schedule_full_text_indexing_event + + void indexer::schedule_full_text_purge_event(const std::string& _object_path, const std::string& _user_name) + { + schedule_policy_events_given_object_path( + irods::indexing::operation_type::purge, irods::indexing::index_type::full_text, _object_path, _user_name); + } // schedule_full_text_purge_event + + void indexer::schedule_metadata_indexing_event(const std::string& _object_path, + const std::string& _user_name, + const std::string& _attribute, + const std::string& _value, + const std::string& _units) + { + schedule_policy_events_given_object_path(irods::indexing::operation_type::index, + irods::indexing::index_type::metadata, + _object_path, + _user_name, + EMPTY_RESOURCE_NAME, + _attribute, + _value, + _units); + } // schedule_metadata_indexing_event + + void indexer::schedule_metadata_purge_event(const std::string& _object_path, + const std::string& _user_name, + const std::string& _attribute, + const std::string& _value, + const std::string& _units, + const std::string& opt_id) + { + // "opt_id" is non-empty if an ID for the deleted object or collection is needed in the task. + schedule_policy_events_given_object_path(irods::indexing::operation_type::purge, + irods::indexing::index_type::metadata, + _object_path, + _user_name, + EMPTY_RESOURCE_NAME, + _attribute, + _value, + _units, + opt_id); + } // schedule_metadata_purge_event + + // - Given an object path (data object or collection) and an operation type, + // - ascend collection hierarchy to find the indices for which policy events + // - must be scheduled on the given object path + void indexer::schedule_policy_events_given_object_path(const std::string& _operation_type, + const std::string& _index_type, + const std::string& _object_path, + const std::string& _user_name, + const std::string& _source_resource, + const std::string& _attribute, + const std::string& _value, + const std::string& _units, + const std::string& _opt_ID) + { + using fsp = irods::experimental::filesystem::path; + namespace fsvr = irods::experimental::filesystem::server; + auto is_data_object = false; + fsp full_path{_object_path}; + + const auto s = fsvr::status(*rei_->rsComm, full_path); + if (fsvr::is_data_object(s)) { + is_data_object = true; + const auto indexing_resources = get_indexing_resource_names(); + if (!resource_is_indexable(_source_resource, indexing_resources)) { + rodsLog(LOG_ERROR, + "resource [%s] is not indexable for object [%s]", + _source_resource.c_str(), + _object_path.c_str()); + return; + } + } + + std::vector processed_indicies; + auto coll = (is_data_object ? full_path.parent_path() : full_path); + while (!coll.empty()) { + try { + auto metadata = get_metadata_for_collection(coll.string(), config_.index); + for (const auto& row : metadata) { + const auto& indexer_string = row.first; + const auto& indexer = row.second; + std::string index_name, index_type; + std::tie(index_name, index_type) = parse_indexer_string(indexer_string); + if (_index_type == index_type) { + auto itr = std::find( + std::begin(processed_indicies), std::end(processed_indicies), index_name + index_type); + if (itr != std::end(processed_indicies)) { + continue; + } + const auto policy_name = operation_and_index_types_to_policy_name(_operation_type, _index_type); + schedule_policy_event_for_object(policy_name, + _object_path, + _user_name, + _source_resource, + indexer, + index_name, + index_type, + generate_delay_execution_parameters(), + _attribute, + _value, + _units, + {{"_obj_optional_ID", _opt_ID}} // _extra_options + ); + processed_indicies.push_back(index_name + index_type); + } + } // for row + } + catch (const irods::exception&) { + } + + if (0 == coll.compare(coll.root_collection())) { + break; + } + + coll = coll.parent_path(); + } + } // schedule_policy_events_given_object_path + + std::string indexer::generate_delay_execution_parameters() + { + std::string params{config_.delay_parameters + "" + config_.instance_name + ""}; + + int min_time = config_.minimum_delay_time <= 0 ? 1 : config_.minimum_delay_time; + int max_time = config_.maximum_delay_time <= 0 ? 30 : config_.maximum_delay_time; + + std::string sleep_time{"1"}; + try { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(min_time, max_time); + sleep_time = boost::lexical_cast(dis(gen)); + } + catch (const boost::bad_lexical_cast&) { + } + + params += "" + sleep_time + "s"; + + rodsLog(config_.log_level, + "irods::storage_tiering :: delay params min [%d] max [%d] computed [%s]", + min_time, + max_time, + params.c_str()); + + return params; + + } // generate_delay_execution_parameters + + void indexer::get_metadata_for_data_object(const std::string& _meta_attr_name, + const std::string& _object_path, + std::string& _value, + std::string& _unit) + { + boost::filesystem::path p{_object_path}; + std::string coll_name = p.parent_path().string(); + std::string data_name = p.filename().string(); + + std::string query_str{boost::str(boost::format("SELECT META_DATA_ATTR_VALUE WHERE META_DATA_ATTR_NAME = " + "'%s' and DATA_NAME = '%s' AND COLL_NAME = '%s'") % + _meta_attr_name % data_name % coll_name)}; + query qobj{comm_, query_str, 1}; + if (qobj.size() == 0) { + THROW( + CAT_NO_ROWS_FOUND, + boost::format("no results found for object [%s] with attribute [%s]") % _object_path % _meta_attr_name); + } + + _value = qobj.front()[0]; + _unit = qobj.front()[1]; + } // get_metadata_for_data_object + + indexer::metadata_results indexer::get_metadata_for_collection(const std::string& _collection, + const std::string& _meta_attr_name) + { + std::string query_str{boost::str(boost::format("SELECT META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS WHERE " + "META_COLL_ATTR_NAME = '%s' and COLL_NAME = '%s'") % + _meta_attr_name % _collection)}; + query qobj{comm_, query_str}; + if (qobj.size() == 0) { + THROW(CAT_NO_ROWS_FOUND, + boost::format("no results found for collection [%s] with attribute [%s]") % _collection % + _meta_attr_name); + } + + metadata_results ret_val; + for (const auto& row : qobj) { + ret_val.push_back(std::make_pair(row[0], row[1])); + } + + return ret_val; + } // get_metadata_for_collection + + void indexer::schedule_policy_event_for_object(const std::string& _event, + const std::string& _object_path, + const std::string& _user_name, + const std::string& _source_resource, + const std::string& _indexer, + const std::string& _index_name, + const std::string& _index_type, + const std::string& _data_movement_params, + const std::string& _attribute, + const std::string& _value, + const std::string& _units, + const nlohmann::json& _extra_options) + { + using json = nlohmann::json; + + const auto& [id_bool, obj_optional_ID] = kws_get(_extra_options, "_obj_optional_ID"); + const auto& [tag_bool, job_category_tag] = kws_get(_extra_options, "job_category_tag"); + + json rule_obj; + rule_obj["rule-engine-operation"] = _event; + rule_obj["rule-engine-instance-name"] = config_.instance_name; + rule_obj["object-path"] = id_bool && !(*obj_optional_ID).empty() ? *obj_optional_ID : _object_path; + rule_obj["user-name"] = _user_name; + rule_obj["indexer"] = _indexer; + rule_obj["index-name"] = _index_name; + rule_obj["index-type"] = _index_type; + rule_obj["source-resource"] = _source_resource; + rule_obj["attribute"] = _attribute; + rule_obj["value"] = _value; + rule_obj["units"] = _units; + if (tag_bool && job_category_tag) { + rule_obj["job-category-tag"] = *job_category_tag; + } + + const auto delay_err = _delayExec(rule_obj.dump().c_str(), "", _data_movement_params.c_str(), rei_); + if (delay_err < 0) { + THROW(delay_err, + boost::format("queue indexing event failed for object [%s] indexer [%s] type [%s]") % _object_path % + _indexer % _index_type); + } + + rodsLog(config_.log_level, + "irods::indexing::indexer indexing object [%s] with [%s] type [%s]", + _object_path.c_str(), + _indexer.c_str(), + _index_type.c_str()); + + } // schedule_policy_event_for_object +} // namespace irods::indexing diff --git a/indexing_utilities.hpp b/indexing_utilities.hpp index 9acfaae..758b915 100644 --- a/indexing_utilities.hpp +++ b/indexing_utilities.hpp @@ -1,139 +1,120 @@ +#ifndef IRODS_CAPABILITY_INDEXING_INDEXING_UTILITIES_HPP +#define IRODS_CAPABILITY_INDEXING_INDEXING_UTILITIES_HPP +#include "configuration.hpp" -#ifndef INDEXING_UTILITIES_HPP -#define INDEXING_UTILITIES_HPP +#include +#include -#include #include -#include - #include -#include -#include "configuration.hpp" -namespace irods { - namespace indexing { - class indexer { - - public: - indexer( - ruleExecInfo_t* _rei, - const std::string& _instance_name); - - void schedule_metadata_purge_for_recursive_rm_object( const std::string& logical_path, - const nlohmann::json & recurse_info); - - void schedule_indexing_policy( - const std::string& _json, - const std::string& _params); - - bool metadata_exists_on_collection( - const std::string& _collection_name, - const std::string& _attribute, - const std::string& _value, - const std::string& _units ); - - void schedule_collection_operation( - const std::string& _operation, - const std::string& _object_path, - const std::string& _user_name, - const std::string& _indexer_string, - const std::string& _indexer); - - void schedule_policy_events_for_collection( - const std::string& _policy_name, - const std::string& _object_path, - const std::string& _user_name, - const std::string& _indexer, - const std::string& _indexer_name, - const std::string& _indexer_type); - - void schedule_full_text_indexing_event( - const std::string& _object_path, - const std::string& _user_name, - const std::string& _source_resource); - - void schedule_full_text_purge_event( - const std::string& _object_path, - const std::string& _user_name); - - void schedule_metadata_indexing_event( - const std::string& _object_path, - const std::string& _user_name, - const std::string& _attribute = {}, - const std::string& _value = {}, - const std::string& _units = {}); - - void schedule_metadata_purge_event( - const std::string& _object_path, - const std::string& _user_name, - const std::string& _attribute = {}, - const std::string& _value = {}, - const std::string& _units = {}, - const std::string& opt_id = {}); - - private: - std::string generate_delay_execution_parameters(); - - void schedule_policy_events_given_object_path( - const std::string& _operation_type, - const std::string& _index_type, - const std::string& _object_path, - const std::string& _user_name, - const std::string& _source_resource = {}, - const std::string& _attribute = {}, - const std::string& _value = {}, - const std::string& _units = {}, - const std::string& opt_id = {}); - - void get_metadata_for_data_object( - const std::string& _meta_attr_name, - const std::string& _object_path, - std::string& _value, - std::string& _unit ); - - using metadata_results = std::vector>; - metadata_results - get_metadata_for_collection( - const std::string& _meta_attr_name, - const std::string& _collection_name); - - void schedule_policy_event_for_object( - const std::string& _event, - const std::string& _object_path, - const std::string& _user_name, - const std::string& _source_resource, - const std::string& _indexer, - const std::string& _index_name, - const std::string& _index_type, - const std::string& _data_movement_params, - const std::string& _attribute = {}, - const std::string& _value = {}, - const std::string& _units = {}, - const nlohmann::json & _extra_options = {} - ); - - std::vector get_indexing_resource_names(); - - std::string get_indexing_resource_name_for_object( - const std::string _object_path, - std::vector _resource_names); - - bool resource_is_indexable( - const std::string _source_resource, - std::vector _resource_names); - - // Attributes - ruleExecInfo_t*rei_; - rsComm_t* comm_; - configuration config_; - - const std::string EMPTY_RESOURCE_NAME{"EMPTY_RESOURCE_NAME"}; - - public: const configuration& get_config() { return config_; } - }; // class indexer - } // namespace indexing -} // namespace irods - -#endif // INDEXING_UTILITIES_HPP +#include +namespace irods::indexing +{ + class indexer + { + public: + indexer(ruleExecInfo_t* _rei, const std::string& _instance_name); + + void schedule_metadata_purge_for_recursive_rm_object(const std::string& logical_path, + const nlohmann::json& recurse_info); + + void schedule_indexing_policy(const std::string& _json, const std::string& _params); + + bool metadata_exists_on_collection(const std::string& _collection_name, + const std::string& _attribute, + const std::string& _value, + const std::string& _units); + + void schedule_collection_operation(const std::string& _operation, + const std::string& _object_path, + const std::string& _user_name, + const std::string& _indexer_string, + const std::string& _indexer); + + void schedule_policy_events_for_collection(const std::string& _policy_name, + const std::string& _object_path, + const std::string& _user_name, + const std::string& _indexer, + const std::string& _indexer_name, + const std::string& _indexer_type); + + void schedule_full_text_indexing_event(const std::string& _object_path, + const std::string& _user_name, + const std::string& _source_resource); + + void schedule_full_text_purge_event(const std::string& _object_path, const std::string& _user_name); + + void schedule_metadata_indexing_event(const std::string& _object_path, + const std::string& _user_name, + const std::string& _attribute = {}, + const std::string& _value = {}, + const std::string& _units = {}); + + void schedule_metadata_purge_event(const std::string& _object_path, + const std::string& _user_name, + const std::string& _attribute = {}, + const std::string& _value = {}, + const std::string& _units = {}, + const std::string& opt_id = {}); + + private: + std::string generate_delay_execution_parameters(); + + void schedule_policy_events_given_object_path(const std::string& _operation_type, + const std::string& _index_type, + const std::string& _object_path, + const std::string& _user_name, + const std::string& _source_resource = {}, + const std::string& _attribute = {}, + const std::string& _value = {}, + const std::string& _units = {}, + const std::string& opt_id = {}); + + void get_metadata_for_data_object(const std::string& _meta_attr_name, + const std::string& _object_path, + std::string& _value, + std::string& _unit); + + using metadata_results = std::vector>; + metadata_results get_metadata_for_collection(const std::string& _meta_attr_name, + const std::string& _collection_name); + + void schedule_policy_event_for_object(const std::string& _event, + const std::string& _object_path, + const std::string& _user_name, + const std::string& _source_resource, + const std::string& _indexer, + const std::string& _index_name, + const std::string& _index_type, + const std::string& _data_movement_params, + const std::string& _attribute = {}, + const std::string& _value = {}, + const std::string& _units = {}, + const nlohmann::json& _extra_options = {}); + + std::vector get_indexing_resource_names(); + + std::string get_indexing_resource_name_for_object(const std::string _object_path, + std::vector _resource_names); + + bool resource_is_indexable(const std::string _source_resource, std::vector _resource_names); + + // Attributes + ruleExecInfo_t* rei_; + rsComm_t* comm_; + configuration config_; + + const std::string EMPTY_RESOURCE_NAME{"EMPTY_RESOURCE_NAME"}; + + public: + const configuration& get_config() + { + return config_; + } + }; // class indexer +} // namespace irods::indexing + +#endif // IRODS_CAPABILITY_INDEXING_INDEXING_UTILITIES_HPP diff --git a/irods_consortium_continuous_integration_build_hook.py b/irods_consortium_continuous_integration_build_hook.py index e6e5098..edc463f 100644 --- a/irods_consortium_continuous_integration_build_hook.py +++ b/irods_consortium_continuous_integration_build_hook.py @@ -15,12 +15,10 @@ def add_cmake_to_front_of_path(): def install_building_dependencies(externals_directory): externals_list = [ - 'irods-externals-boost1.78.0-0', + 'irods-externals-boost1.81.0-0', 'irods-externals-clang-runtime13.0.0-0', 'irods-externals-clang13.0.0-0', 'irods-externals-cmake3.21.4-0', - "irods-externals-cpr1.3.0-1", - "irods-externals-elasticlientd68e30e3-0", 'irods-externals-json3.10.4-0' ] if externals_directory == 'None' or externals_directory is None: @@ -49,6 +47,7 @@ def install_os_specific_dependencies(): 'Centos': install_os_specific_dependencies_yum, 'Debian gnu_linux': install_os_specific_dependencies_apt, 'Opensuse ': install_os_specific_dependencies_yum, + 'Rocky linux': install_os_specific_dependencies_yum, 'Ubuntu': install_os_specific_dependencies_apt } try: diff --git a/irods_consortium_continuous_integration_test_hook.py b/irods_consortium_continuous_integration_test_hook.py index 1838630..eda28da 100644 --- a/irods_consortium_continuous_integration_test_hook.py +++ b/irods_consortium_continuous_integration_test_hook.py @@ -12,7 +12,7 @@ def Indexing_PackageName_Regex( package_ext, technology = 'elasticsearch' ): tech = re.escape(technology) ext = re.escape(package_ext) return re.compile( - r'irods-rule-engine-plugin-(document-type|{tech}|indexing)[-_][0-9].*\.{ext}$'.format(**locals()) + r'irods-rule-engine-plugin-({tech}|indexing)[-_][0-9].*\.{ext}$'.format(**locals()) ) def get_matching_packages(directory,ext): @@ -48,6 +48,7 @@ def get_build_prerequisites(): 'Centos': get_build_prerequisites_yum, 'Debian gnu_linux': get_build_prerequisites_apt, 'Opensuse': get_build_prerequisites_zypper, + 'Rocky linux': get_build_prerequisites_yum, 'Ubuntu': get_build_prerequisites_apt } try: diff --git a/libirods_rule_engine_plugin-document_type.cpp b/libirods_rule_engine_plugin-document_type.cpp deleted file mode 100644 index d02f9fe..0000000 --- a/libirods_rule_engine_plugin-document_type.cpp +++ /dev/null @@ -1,263 +0,0 @@ - -#define IRODS_IO_TRANSPORT_ENABLE_SERVER_SIDE_API -#define IRODS_QUERY_ENABLE_SERVER_SIDE_API -#include -#include -#include -#include "utilities.hpp" -#include "plugin_specific_configuration.hpp" -#include "configuration.hpp" -#include -#include - -#include -#include - -namespace { - struct configuration { - std::string instance_name_; - std::vector hosts_; - int bulk_count_{100}; - int read_size_{4194304}; - configuration(const std::string& _instance_name) : - instance_name_{_instance_name} { - try { - auto cfg = irods::indexing::get_plugin_specific_configuration(_instance_name); - if(cfg.find("hosts") != cfg.end()) { - std::vector host_list = boost::any_cast>(cfg.at("hosts")); - for( auto& i : host_list) { - hosts_.push_back(boost::any_cast(i)); - } - } - - if(cfg.find("bulk_count") != cfg.end()) { - bulk_count_ = boost::any_cast(cfg.at("bulk_count")); - } - - if(cfg.find("read_size") != cfg.end()) { - bulk_count_ = boost::any_cast(cfg.at("read_size")); - } - } - catch(const boost::bad_any_cast& _e) { - THROW( - INVALID_ANY_CAST, - _e.what()); - } - }// ctor - }; // configuration - - std::unique_ptr config; - std::string document_type_index_policy; - - void invoke_document_type_indexing_event( - ruleExecInfo_t* _rei, - const std::string& _object_path, - const std::string& _source_resource, - std::string* _document_type) { - using ids = irods::experimental::io::idstream; - - (*_document_type) = "text"; - } // invoke_document_type_indexing_event - -} // namespace - -irods::error start( - irods::default_re_ctx&, - const std::string& _instance_name ) { - RuleExistsHelper::Instance()->registerRuleRegex("irods_policy_.*"); - config = std::make_unique(_instance_name); - document_type_index_policy = irods::indexing::policy::compose_policy_name( - irods::indexing::policy::prefix, - "document_type_elastic"); - return SUCCESS(); -} - -irods::error stop( - irods::default_re_ctx&, - const std::string& ) { - return SUCCESS(); -} - -irods::error rule_exists( - irods::default_re_ctx&, - const std::string& _rn, - bool& _ret) { - _ret = document_type_index_policy == _rn; - return SUCCESS(); -} - -irods::error list_rules( - irods::default_re_ctx&, - std::vector& _rules) { - _rules.push_back(document_type_index_policy); - return SUCCESS(); -} - -irods::error exec_rule( - irods::default_re_ctx&, - const std::string& _rn, - std::list& _args, - irods::callback _eff_hdlr) { - ruleExecInfo_t* rei{}; - const auto err = _eff_hdlr("unsafe_ms_ctx", &rei); - if(!err.ok()) { - return err; - } - - try { - // Extract parameters from args - auto it = _args.begin(); - const std::string object_path{ boost::any_cast(*it) }; ++it; - const std::string source_resource{ boost::any_cast(*it) }; ++it; - std::string* document_type{ boost::any_cast(*it) }; ++it; - - invoke_document_type_indexing_event( - rei, - object_path, - source_resource, - document_type); - } - catch(const std::invalid_argument& _e) { - irods::indexing::exception_to_rerror( - SYS_NOT_SUPPORTED, - _e.what(), - rei->rsComm->rError); - return ERROR( - SYS_NOT_SUPPORTED, - _e.what()); - } - catch(const boost::bad_any_cast& _e) { - irods::indexing::exception_to_rerror( - INVALID_ANY_CAST, - _e.what(), - rei->rsComm->rError); - return ERROR( - SYS_NOT_SUPPORTED, - _e.what()); - } - catch(const irods::exception& _e) { - irods::indexing::exception_to_rerror( - _e, - rei->rsComm->rError); - return irods::error(_e); - } - - return err; - -} // exec_rule - -irods::error exec_rule_text( - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - const std::string&, - irods::callback ) { - return ERROR( - RULE_ENGINE_CONTINUE, - "exec_rule_text is not supported"); -} // exec_rule_text - -irods::error exec_rule_expression( - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - irods::callback) { - return ERROR( - RULE_ENGINE_CONTINUE, - "exec_rule_expression is not supported"); -} // exec_rule_expression - -extern "C" -irods::pluggable_rule_engine* plugin_factory( - const std::string& _inst_name, - const std::string& _context ) { - irods::pluggable_rule_engine* re = - new irods::pluggable_rule_engine( - _inst_name, - _context); - - re->add_operation< - irods::default_re_ctx&, - const std::string&>( - "start", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&)>(start)); - - re->add_operation< - irods::default_re_ctx&, - const std::string&>( - "stop", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&)>(stop)); - - re->add_operation< - irods::default_re_ctx&, - const std::string&, - bool&>( - "rule_exists", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - bool&)>(rule_exists)); - - re->add_operation< - irods::default_re_ctx&, - std::vector&>( - "list_rules", - std::function< - irods::error( - irods::default_re_ctx&, - std::vector&)>(list_rules)); - - re->add_operation< - irods::default_re_ctx&, - const std::string&, - std::list&, - irods::callback>( - "exec_rule", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - std::list&, - irods::callback)>(exec_rule)); - - re->add_operation< - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - const std::string&, - irods::callback>( - "exec_rule_text", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - const std::string&, - irods::callback)>(exec_rule_text)); - - re->add_operation< - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - irods::callback>( - "exec_rule_expression", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - irods::callback)>(exec_rule_expression)); - return re; - -} // plugin_factory - - - - diff --git a/libirods_rule_engine_plugin-elasticsearch.cpp b/libirods_rule_engine_plugin-elasticsearch.cpp index 2cb15e1..b66f4d7 100644 --- a/libirods_rule_engine_plugin-elasticsearch.cpp +++ b/libirods_rule_engine_plugin-elasticsearch.cpp @@ -1,913 +1,863 @@ +#include "configuration.hpp" +#include "plugin_specific_configuration.hpp" +#include "utilities.hpp" -#define IRODS_IO_TRANSPORT_ENABLE_SERVER_SIDE_API -#define IRODS_QUERY_ENABLE_SERVER_SIDE_API -#include +#include +#include +#include #include #include -#include "utilities.hpp" -#include "plugin_specific_configuration.hpp" -#include "configuration.hpp" -#include +#include #include -#include -#include -#include -#include +#define IRODS_QUERY_ENABLE_SERVER_SIDE_API +#include + +#define IRODS_IO_TRANSPORT_ENABLE_SERVER_SIDE_API +#include #include + #define IRODS_FILESYSTEM_ENABLE_SERVER_SIDE_API #include -#include -#include -#include -#include -#include -#include - +#include #include #include -#include -#include -#include -#include -#include + #include -#include #include +#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include -namespace { - - using HTTPMethod = elasticlient::Client::HTTPMethod; - - namespace ElasticSearch { - - cpr::Response index (const std::string & version, - elasticlient::Client &cl, - const std::string & index_name, - const std::string & mapping_type, - const std::string & doc_id, - const std::string & body) - { - if (version < "7.") - return cl.index (index_name, mapping_type, doc_id, body); - else - return cl.performRequest (HTTPMethod::PUT, - index_name + "/_doc/" + doc_id, - body); - } - - cpr::Response remove (const std::string & version, - elasticlient::Client &cl, - const std::string & index_name, - const std::string & mapping_type, - const std::string & doc_id) - { - if (version < "7.") - return cl.remove (index_name, mapping_type, doc_id); - else - return cl.performRequest (HTTPMethod::DELETE, - index_name + "/_doc/" + doc_id, - ""); - } - - } // namespace ElasticSearch - - using string_t = std::string; - - struct configuration : irods::indexing::configuration { - std::vector hosts_; - int bulk_count_{10}; - int read_size_{4194304}; - std::string es_version_{"7."}; - configuration(const std::string& _instance_name) : - irods::indexing::configuration(_instance_name) { - try { - auto cfg = irods::indexing::get_plugin_specific_configuration(_instance_name); - if(cfg.find("hosts") != cfg.end()) { - nlohmann::json host_list = cfg.at("hosts"); - for( auto& i : host_list) { - hosts_.push_back(i.get()); - } - } - - if(cfg.find("es_version") != cfg.end()) { - es_version_ = cfg.at("es_version").get(); - } - - if(cfg.find("bulk_count") != cfg.end()) { - bulk_count_ = cfg.at("bulk_count").get(); - } - - if(cfg.find("read_size") != cfg.end()) { - bulk_count_ = cfg.at("read_size").get(); - } - } - catch(const std::exception& _e) { - THROW( - USER_INPUT_OPTION_ERR, _e.what()); - } - }// ctor - }; // configuration - - std::unique_ptr config; - std::string object_index_policy; - std::string object_purge_policy; - std::string metadata_index_policy; - std::string metadata_purge_policy; - - void apply_document_type_policy( - ruleExecInfo_t* _rei, - const std::string& _object_path, - const std::string& _source_resource, - std::string* _document_type) { - - std::list args; - args.push_back(boost::any(_object_path)); - args.push_back(boost::any(_source_resource)); - args.push_back(boost::any(_document_type)); - std::string policy_name = irods::indexing::policy::compose_policy_name( - irods::indexing::policy::prefix, - "document_type_elastic"); - irods::indexing::invoke_policy(_rei, policy_name, args); - - } // apply_document_type_policy - - void log_fcn(elasticlient::LogLevel, const std::string& _msg) { - rodsLog(LOG_DEBUG, "ELASTICLIENT :: [%s]", _msg.c_str()); - } // log_fcn - - std::string generate_id() { - using namespace boost::archive::iterators; - std::stringstream os; - typedef - base64_from_binary< // convert binary values to base64 characters - transform_width - > - base64_text; // compose all the above operations in to a new iterator - - boost::uuids::uuid uuid{boost::uuids::random_generator()()}; - std::string uuid_str = boost::uuids::to_string(uuid); - std::copy( - base64_text(uuid_str.c_str()), - base64_text(uuid_str.c_str() + uuid_str.size()), - ostream_iterator(os)); - - return os.str(); - } // generate_id - - std::string get_object_index_id( - ruleExecInfo_t* _rei, - const std::string& _object_path, - bool *iscoll = nullptr - ) { - boost::filesystem::path p{_object_path}; - std::string coll_name = p.parent_path().string(); - std::string data_name = p.filename().string(); - namespace fs = irods::experimental::filesystem; - namespace fsvr = irods::experimental::filesystem::server; - std::string query_str; - if (fsvr::is_collection( *_rei->rsComm, fs::path{_object_path} )) { - if (iscoll) { *iscoll = true; } - query_str = boost::str( boost::format("SELECT COLL_ID WHERE COLL_NAME = '%s'") - % _object_path ); - } - else { - if (iscoll) { *iscoll = false; } - query_str = boost::str( boost::format("SELECT DATA_ID WHERE DATA_NAME = '%s' AND COLL_NAME = '%s'") - % data_name - % coll_name ); - } - try { - irods::query qobj{_rei->rsComm, query_str, 1}; - if(qobj.size() > 0) { - return qobj.front()[0]; - } - THROW( - CAT_NO_ROWS_FOUND, - boost::format("failed to get object id for [%s]") - % _object_path); - } - catch(const irods::exception& _e) { - THROW( - CAT_NO_ROWS_FOUND, - boost::format("failed to get object id for [%s]") - % _object_path); - } - - } // get_object_index_id - - void get_metadata_for_object_index_id( - ruleExecInfo_t* _rei, - std::string _obj_id, - bool _is_coll, - std::optional & _out - ) - { - if (!_out || !_out->is_array()) _out = nlohmann::json::array(); - auto & avus_out = *_out; - const std::string query_str = _is_coll ? - fmt::format("SELECT META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS" - " WHERE COLL_ID = '{}' ", _obj_id) : - fmt::format("SELECT META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE, META_DATA_ATTR_UNITS" - " WHERE DATA_ID = '{}' ", _obj_id); - irods::query qobj{_rei->rsComm, query_str}; - for (const auto & row : qobj) { - if (row[0] == config->index) continue; - avus_out += { { "attribute", row[0] }, - { "value", row[1] }, - { "unit", row[2] } }; - } - } // get_metadata_for_object_index_id - - void update_object_metadata( - ruleExecInfo_t* _rei, - const std::string& _object_path, - const std::string& _attribute, - const std::string& _value, - const std::string& _units ) { - modAVUMetadataInp_t set_op{ - .arg0 = "set", - .arg1 = "-d", - .arg2 = const_cast(_object_path.c_str()), - .arg3 = const_cast(_attribute.c_str()), - .arg4 = const_cast(_value.c_str()), - .arg5 = const_cast(_units.c_str())}; - - auto status = rsModAVUMetadata(_rei->rsComm, &set_op); - if(status < 0) { - THROW( - status, - boost::format("failed to update object [%s] metadata") - % _object_path); - } - } // update_object_metadata - - void invoke_indexing_event_full_text( - ruleExecInfo_t* _rei, - const std::string& _object_path, - const std::string& _source_resource, - const std::string& _index_name) { - - try { - std::string doc_type{"text"}; - apply_document_type_policy( - _rei, - _object_path, - _source_resource, - &doc_type); - - const long read_size{config->read_size_}; - const int bulk_count{config->bulk_count_}; - const std::string object_id{get_object_index_id(_rei, _object_path)}; - - std::shared_ptr client = - std::make_shared( - config->hosts_); - elasticlient::Bulk bulkIndexer(client); - elasticlient::SameIndexBulkData bulk(_index_name, bulk_count); - - char read_buff[read_size]; - irods::experimental::io::server::basic_transport xport(*_rei->rsComm); - irods::experimental::io::idstream ds{xport, _object_path}; - - int chunk_counter{0}; - bool need_final_perform{false}; - while(ds) { - ds.read(read_buff, read_size); - std::string data{read_buff}; - - // filter out new line characters - data.erase( - std::remove_if( - data.begin(), - data.end(), - [](wchar_t c) {return (std::iscntrl(c) || c == '"' || c == '\'' || c == '\\');}), - data.end()); - - std::string index_id{ - boost::str( - boost::format( - "%s_%d") - % object_id - % chunk_counter)}; - ++chunk_counter; - - std::string payload{ - boost::str( - boost::format( - "{ \"absolutePath\" : \"%s\", \"data\" : \"%s\" }") - % _object_path - % data)}; - - need_final_perform = true; - bool done = bulk.indexDocument(doc_type, index_id, payload.data()); - if(done) { - need_final_perform = false; - // have reached bulk_count chunks - auto error_count = bulkIndexer.perform(bulk); - if(error_count > 0) { - rodsLog( - LOG_ERROR, - "Encountered %d errors when indexing [%s]", - error_count, - _object_path.c_str()); - } - bulk.clear(); - } - } // while - - if(need_final_perform) { - auto error_count = bulkIndexer.perform(bulk); - if(error_count > 0) { - rodsLog( - LOG_ERROR, - "Encountered %d errors when indexing [%s]", - error_count, - _object_path.c_str()); - } - bulk.clear(); - } - } - catch(const irods::exception& _e) { - rodsLog( - LOG_ERROR, - "Exception [%s]", - _e.what()); - auto irods_error = _e.code(); - if (irods_error != CAT_NO_ROWS_FOUND) { - THROW( - irods_error, - _e.what()); - } - } - catch(const std::runtime_error& _e) { - rodsLog( - LOG_ERROR, - "Exception [%s]", - _e.what()); - THROW( - SYS_INTERNAL_ERR, - _e.what()); - } - catch(const std::exception& _e) { - rodsLog( - LOG_ERROR, - "Exception [%s]", - _e.what()); - THROW( - SYS_INTERNAL_ERR, - _e.what()); - } - } // invoke_indexing_event_full_text - - void invoke_purge_event_full_text( - ruleExecInfo_t* _rei, - const std::string& _object_path, - const std::string& _source_resource, - const std::string& _index_name) { - - try { - std::string doc_type{"text"}; - apply_document_type_policy( - _rei, - _object_path, - _source_resource, - &doc_type); - - const long read_size{config->read_size_}; - const int bulk_count{config->bulk_count_}; - const std::string object_id{get_object_index_id(_rei, _object_path)}; - elasticlient::Client client{config->hosts_}; - - int chunk_counter{0}; - - bool done{false}; - while(!done) { - std::string index_id{ - boost::str( - boost::format( - "%s_%d") - % object_id - % chunk_counter)}; - ++chunk_counter; - const cpr::Response response = client.remove(_index_name, doc_type, index_id); - if(response.status_code != 200) { - done = true; - if(response.status_code == 404) { // meaningful for logging - rodsLog (LOG_NOTICE, boost::str(boost::format("elasticlient 404: no index entry for chunk (%d) of object_id '%d' " - "in index '%s'") % chunk_counter % object_id % _index_name).c_str()); - } - } - } // while - } - catch(const std::runtime_error& _e) { - rodsLog( - LOG_ERROR, - "Exception [%s]", - _e.what()); - THROW( - SYS_INTERNAL_ERR, - _e.what()); - } - catch(const irods::exception& _e) { - if (_e.code() == CAT_NO_ROWS_FOUND) { - return; - } - THROW( - SYS_INTERNAL_ERR, - _e.what()); - } - catch(const std::exception& _e) { - rodsLog( - LOG_ERROR, - "Exception [%s]", - _e.what()); - THROW( - SYS_INTERNAL_ERR, - _e.what()); - } - } // invoke_purge_event_full_text - - std::string get_metadata_index_id( - const std::string& _index_id, - const std::string& _attribute, - const std::string& _value, - const std::string& _units) { - - std::string str = _attribute + - _value + - _units; - irods::Hasher hasher; - irods::getHasher( irods::MD5_NAME, hasher ); - hasher.update(str); - - std::string digest; - hasher.digest(digest); - - return _index_id + irods::indexing::indexer_separator + digest; - - } // get_metadata_index_id - - void invoke_indexing_event_metadata( - ruleExecInfo_t* _rei, - const std::string& _object_path, - const std::string& _attribute, - const std::string& _value, - const std::string& _unit, - const std::string& _index_name, - nlohmann::json & obj_meta ) { - - try { - bool is_coll{}; - elasticlient::Client client{config->hosts_}; - auto object_id = get_object_index_id( _rei, _object_path, &is_coll); - - std::optional jsonarray; - get_metadata_for_object_index_id( _rei, object_id, is_coll, jsonarray ); - if (!jsonarray) { - irods::log( LOG_WARNING, fmt::format("In {}, function {}: Aborted indexing metadata, null AVU array returned for object [{}]", - __FILE__, __func__,_object_path)); - return; - } - obj_meta ["metadataEntries"] = *jsonarray; - - const cpr::Response response = ElasticSearch::index(config->es_version_, client, _index_name, "text", object_id, obj_meta.dump()); - - if(response.status_code != 200 && response.status_code != 201) { - THROW( - SYS_INTERNAL_ERR, - boost::format("failed to index metadata [%s] [%s] [%s] for [%s] code [%d] message [%s]") - % _attribute - % _value - % _unit - % _object_path - % response.status_code - % response.text); - } - } - catch(const irods::exception& _e) { - rodsLog( - LOG_ERROR, - "Exception [%s]", - _e.what()); - auto irods_error = _e.code(); - if (irods_error != CAT_NO_ROWS_FOUND) { - THROW( - irods_error, - _e.what()); - } - } - catch(const std::runtime_error& _e) { - rodsLog( - LOG_ERROR, - "Exception [%s]", - _e.what()); - THROW( - SYS_INTERNAL_ERR, - _e.what()); - } - catch(const std::exception& _e) { - rodsLog( - LOG_ERROR, - "Exception [%s]", - _e.what()); - THROW( - SYS_INTERNAL_ERR, - _e.what()); - } - } // invoke_indexing_event_metadata - - void invoke_purge_event_metadata( - ruleExecInfo_t* _rei, - const std::string& _object_path, - const std::string& _attribute, - const std::string& _value, - const std::string& _unit, - const std::string& _index_name, const nlohmann::json & = {} ) - { - - try { - elasticlient::Client client{config->hosts_}; - namespace fsvr = irods::experimental::filesystem; - // we now accept object id or path here, so pep_api_rm_coll_post can purge - std::string object_id { - fsvr::path{_object_path}.is_absolute() ? get_object_index_id( _rei, _object_path) - : _object_path - }; - const cpr::Response response = ElasticSearch::remove(config->es_version_, client, _index_name, "text", object_id); - switch(response.status_code) { - // either the index has been deleted, or the AVU was cleared unexpectedly - case 404: - rodsLog (LOG_NOTICE, boost::str(boost::format("elasticlient 404: no index entry for AVU (%s,%s,%s) on object '%s' in " - "index '%s'") % _attribute % _value % _unit % _object_path % _index_name).c_str()); - break; - // routinely expected return codes ( not logged ): - case 200: break; - case 201: break; - // unexpected return codes: - default: - THROW( - SYS_INTERNAL_ERR, - boost::format("failed to index metadata [%s] [%s] [%s] for [%s] code [%d] message [%s]") - % _attribute - % _value - % _unit - % _object_path - % response.status_code - % response.text); - } - } - catch(const std::runtime_error& _e) { - rodsLog( - LOG_ERROR, - "Exception [%s]", - _e.what()); - THROW( - SYS_INTERNAL_ERR, - _e.what()); - } - catch(const std::exception& _e) { - rodsLog( - LOG_ERROR, - "Exception [%s]", - _e.what()); - THROW( - SYS_INTERNAL_ERR, - _e.what()); - } - - } // invoke_purge_event_metadata +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + namespace beast = boost::beast; + namespace http = beast::http; + + using http_response = std::optional>; + using json = nlohmann::json; + using string_t = std::string; + + struct configuration : irods::indexing::configuration + { + std::vector hosts; + int bulk_count{10}; + int read_size{4194304}; + std::string es_version{"7."}; // TODO Not used. + + configuration(const std::string& _instance_name) + : irods::indexing::configuration(_instance_name) + { + try { + auto cfg = irods::indexing::get_plugin_specific_configuration(_instance_name); + + if (auto iter = cfg.find("hosts"); iter != cfg.end()) { + for (auto& i : *iter) { + hosts.push_back(i.get()); + } + } + else { + THROW(USER_INPUT_OPTION_ERR, fmt::format("{}: elasticsearch: [hosts] cannot be empty", __func__)); + } + + if (auto iter = cfg.find("es_version"); iter != cfg.end()) { + es_version = iter->get(); + } + + if (auto iter = cfg.find("bulk_count"); iter != cfg.end()) { + bulk_count = iter->get(); + if (bulk_count <= 0) { + THROW(USER_INPUT_OPTION_ERR, + fmt::format( + "{}: elasticsearch: Invalid value [{}] for [bulk_count]", __func__, bulk_count)); + } + } + + if (auto iter = cfg.find("read_size"); iter != cfg.end()) { + read_size = iter->get(); + if (read_size <= 0) { + THROW( + USER_INPUT_OPTION_ERR, + fmt::format("{}: elasticsearch: Invalid value [{}] for [read_size]", __func__, read_size)); + } + } + } + catch (const std::exception& _e) { + THROW(USER_INPUT_OPTION_ERR, _e.what()); + } + } + }; // struct configuration + + std::unique_ptr config; + std::string object_index_policy; + std::string object_purge_policy; + std::string metadata_index_policy; + std::string metadata_purge_policy; + + auto send_http_request(http::verb _verb, const std::string_view _target, const std::string_view _body = "") + -> http_response + { + for (auto&& host : config->hosts) { + if (host.empty()) { + rodsLog(LOG_ERROR, "%s: empty service URL.", __func__); + continue; + } + + namespace urls = boost::urls; + + urls::result result = urls::parse_uri(host); + if (!result) { + rodsLog(LOG_ERROR, fmt::format("{}: could not parse service URL [{}].", __func__, host).c_str()); + continue; + } + + const auto use_tls = (result->has_scheme() && result->scheme_id() == urls::scheme::https); + + namespace net = boost::asio; + using tcp = net::ip::tcp; + + // This lambda encapsulates the HTTP logic that's independent of the type of stream. + const auto construct_and_send_http_request = [_verb, _target, _body](auto& _stream) { + http::request req{_verb, _target, 11}; + req.set(http::field::host, boost::asio::ip::host_name()); + req.set(http::field::user_agent, "iRODS Indexing Plugin/" IRODS_PLUGIN_VERSION); + req.set(http::field::content_type, "application/json"); + + if (!_body.empty()) { + req.body() = _body; + + std::stringstream ss; + ss << req; + const auto s = ss.str(); + + if (s.size() > 256) { + rodsLog(LOG_DEBUG, + fmt::format("{}: sending request = (truncated) [{} ...]", __func__, s.substr(0, 256)) + .c_str()); + } + else { + rodsLog(LOG_DEBUG, fmt::format("{}: sending request = [{}]", __func__, s).c_str()); + } + + req.prepare_payload(); + } + + http::write(_stream, req); + + beast::flat_buffer buffer; + http::response res; + http::read(_stream, buffer, res); + + std::stringstream ss; + ss << res; + rodsLog(LOG_DEBUG, fmt::format("{}: elasticsearch response = [{}]", __func__, ss.str()).c_str()); + + return res; + }; + + try { + net::io_context ioc; + + tcp::resolver resolver{ioc}; + const auto results = resolver.resolve(result->host(), result->port()); + + if (use_tls) { + net::ssl::context tls_ctx{net::ssl::context::tlsv12_client}; + tls_ctx.set_default_verify_paths(); + tls_ctx.set_verify_mode(net::ssl::verify_peer); + + beast::ssl_stream stream{ioc, tls_ctx}; + + // Set SNI hostname (many hosts need this to handshake successfully). + if (!::SSL_set_tlsext_host_name(stream.native_handle(), result->host().c_str())) { + beast::error_code ec{static_cast(::ERR_get_error()), net::error::get_ssl_category()}; + throw beast::system_error{ec}; + } + + beast::get_lowest_layer(stream).connect(results); + + auto res = construct_and_send_http_request(stream); + + beast::error_code ec; + stream.shutdown(ec); + + if (net::error::eof == ec) { + // Rationale: + // http://stackoverflow.com/questions/25587403/boost-asio-ssl-async-shutdown-always-finishes-with-an-error + ec = {}; + } + + if (ec) { + throw beast::system_error{ec}; + } + + return res; + } + else { + beast::tcp_stream stream{ioc}; + stream.connect(results); + + auto res = construct_and_send_http_request(stream); + + beast::error_code ec; + stream.socket().shutdown(tcp::socket::shutdown_both, ec); + + // not_connected happens sometimes, so don't bother reporting it. + if (ec && ec != beast::errc::not_connected) { + throw beast::system_error{ec}; + } + + return res; + } + } + catch (const std::exception& e) { + rodsLog(LOG_ERROR, fmt::format("{}: {}", __func__, e.what()).c_str()); + } + } + + return std::nullopt; + } // send_http_request + + std::string generate_id() + { + using namespace boost::archive::iterators; + std::stringstream os; + typedef base64_from_binary< // convert binary values to base64 characters + transform_width< // retrieve 6 bit integers from a sequence of 8 bit bytes + const char*, + 6, + 8>> + base64_text; // compose all the above operations in to a new iterator + + boost::uuids::uuid uuid{boost::uuids::random_generator()()}; + std::string uuid_str = boost::uuids::to_string(uuid); + std::copy( + base64_text(uuid_str.c_str()), base64_text(uuid_str.c_str() + uuid_str.size()), ostream_iterator(os)); + + return os.str(); + } // generate_id + + std::string get_object_index_id(ruleExecInfo_t* _rei, const std::string& _object_path, bool* iscoll = nullptr) + { + boost::filesystem::path p{_object_path}; + std::string coll_name = p.parent_path().string(); + std::string data_name = p.filename().string(); + namespace fs = irods::experimental::filesystem; + namespace fsvr = irods::experimental::filesystem::server; + std::string query_str; + if (fsvr::is_collection(*_rei->rsComm, fs::path{_object_path})) { + if (iscoll) { + *iscoll = true; + } + query_str = fmt::format("SELECT COLL_ID WHERE COLL_NAME = '{}'", _object_path); + } + else { + if (iscoll) { + *iscoll = false; + } + query_str = fmt::format("SELECT DATA_ID WHERE COLL_NAME = '{}' AND DATA_NAME = '{}'", coll_name, data_name); + } + try { + irods::query qobj{_rei->rsComm, query_str, 1}; + if (qobj.size() > 0) { + return qobj.front()[0]; + } + THROW(CAT_NO_ROWS_FOUND, boost::format("failed to get object id for [%s]") % _object_path); + } + catch (const irods::exception& _e) { + THROW(CAT_NO_ROWS_FOUND, boost::format("failed to get object id for [%s]") % _object_path); + } + + } // get_object_index_id + + void get_metadata_for_object_index_id(ruleExecInfo_t* _rei, + std::string _obj_id, + bool _is_coll, + std::optional& _out) + { + if (!_out || !_out->is_array()) { + _out = nlohmann::json::array(); + } + + auto& avus_out = *_out; + const std::string query_str = + _is_coll ? fmt::format("SELECT META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS" + " WHERE COLL_ID = '{}' ", + _obj_id) + : fmt::format("SELECT META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE, META_DATA_ATTR_UNITS" + " WHERE DATA_ID = '{}' ", + _obj_id); + irods::query qobj{_rei->rsComm, query_str}; + for (const auto& row : qobj) { + if (row[0] == config->index) + continue; + avus_out += {{"attribute", row[0]}, {"value", row[1]}, {"unit", row[2]}}; + } + } // get_metadata_for_object_index_id + + void update_object_metadata(ruleExecInfo_t* _rei, + const std::string& _object_path, + const std::string& _attribute, + const std::string& _value, + const std::string& _units) + { + modAVUMetadataInp_t set_op{.arg0 = "set", + .arg1 = "-d", + .arg2 = const_cast(_object_path.c_str()), + .arg3 = const_cast(_attribute.c_str()), + .arg4 = const_cast(_value.c_str()), + .arg5 = const_cast(_units.c_str())}; + + auto status = rsModAVUMetadata(_rei->rsComm, &set_op); + if (status < 0) { + THROW(status, boost::format("failed to update object [%s] metadata") % _object_path); + } + } // update_object_metadata + + void invoke_indexing_event_full_text(ruleExecInfo_t* _rei, + const std::string& _object_path, + const std::string& _source_resource, + const std::string& _index_name) + { + try { + const std::string object_id = get_object_index_id(_rei, _object_path); + std::vector buffer(config->read_size); + irods::experimental::io::server::basic_transport xport(*_rei->rsComm); + irods::experimental::io::idstream in{xport, _object_path}; + + int chunk_counter{0}; + bool need_final_perform{false}; + std::stringstream ss; + + while (in) { + in.read(buffer.data(), buffer.size()); + + // The indexing instruction. + // clang-format off + ss << json{{"index", { + {"_id", fmt::format("{}_{}", object_id, chunk_counter++)} + }}}.dump() << '\n'; + // clang-format on + + // The defaults for the .dump() member function. + constexpr int indent = -1; + constexpr char indent_char = ' '; + constexpr bool ensure_ascii = false; + + // The data to index. + // The version of .dump() invoked here instructs the library to ignore + // invalid UTF-8 sequences. All bytes are copied to the output unchanged. + // clang-format off + ss << json{ + {"absolutePath", _object_path}, + {"data", std::string_view(buffer.data(), in.gcount())} + }.dump(indent, indent_char, ensure_ascii, json::error_handler_t::ignore) << '\n'; + // clang-format on + + // Send bulk request if chunk counter has reached bulk limit. + if (chunk_counter == config->bulk_count) { + chunk_counter = 0; + + const auto res = send_http_request(http::verb::post, _index_name + "/_bulk", ss.str()); + + if (!res.has_value()) { + rodsLog(LOG_ERROR, "%s: No response from elasticsearch host.", __func__); + } + else if (res->result_int() / 100 != 2) { + rodsLog(LOG_ERROR, + "%s: Error sending request to elasticsearch host. [http_status_code=[%d]]", + __func__, + res->result_int()); + } + + ss.str(""); + } + } + + if (chunk_counter > 0) { + // Elasticsearch limits the maximum size of a HTTP request to 100mb. + // See https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html. + const auto res = send_http_request(http::verb::post, _index_name + "/_bulk", ss.str()); + + if (!res.has_value()) { + rodsLog(LOG_ERROR, "%s: No response from elasticsearch host.", __func__); + } + else if (res->result_int() / 100 != 2) { + rodsLog(LOG_ERROR, + "%s: Error sending request to elasticsearch host. [http_status_code=[%d]]", + __func__, + res->result_int()); + } + } + } + catch (const irods::exception& _e) { + rodsLog(LOG_ERROR, "Exception [%s]", _e.what()); + auto irods_error = _e.code(); + if (irods_error != CAT_NO_ROWS_FOUND) { + THROW(irods_error, _e.what()); + } + } + catch (const std::runtime_error& _e) { + rodsLog(LOG_ERROR, "Exception [%s]", _e.what()); + THROW(SYS_INTERNAL_ERR, _e.what()); + } + catch (const std::exception& _e) { + rodsLog(LOG_ERROR, "Exception [%s]", _e.what()); + THROW(SYS_INTERNAL_ERR, _e.what()); + } + } // invoke_indexing_event_full_text + + void invoke_purge_event_full_text(ruleExecInfo_t* _rei, + const std::string& _object_path, + const std::string& _source_resource, + const std::string& _index_name) + { + try { + const std::string object_id{get_object_index_id(_rei, _object_path)}; + int chunk_counter{0}; + + while (true) { + const auto index_entry = fmt::format("{}/_doc/{}_{}", _index_name, object_id, chunk_counter++); + const auto response = send_http_request(http::verb::delete_, index_entry); + + if (!response.has_value()) { + rodsLog(LOG_ERROR, + "%s: No response from elasticsearch host. Index entry [%s] may not have been purged", + __func__, + index_entry.c_str()); + break; + } + + // Some objects will be split into multiple chunks. Because we don't track them, + // the only way to know when all chunks have been processed is to send requests until + // we receive a HTTP status code of 404. Here, we expand the status code range to + // anything other than 200. + if (response->result_int() != 200) { + if (response->result_int() == 404) { // meaningful for logging + rodsLog(LOG_NOTICE, + fmt::format("{}: No index entry for chunk [{}] of object_id [{}] in index [{}]", + __func__, + chunk_counter, + object_id, + _index_name) + .c_str()); + } + + break; + } + } + } + catch (const std::runtime_error& _e) { + rodsLog(LOG_ERROR, "Exception [%s]", _e.what()); + THROW(SYS_INTERNAL_ERR, _e.what()); + } + catch (const irods::exception& _e) { + if (_e.code() == CAT_NO_ROWS_FOUND) { + return; + } + THROW(SYS_INTERNAL_ERR, _e.what()); + } + catch (const std::exception& _e) { + rodsLog(LOG_ERROR, "Exception [%s]", _e.what()); + THROW(SYS_INTERNAL_ERR, _e.what()); + } + } // invoke_purge_event_full_text + + std::string get_metadata_index_id(const std::string& _index_id, + const std::string& _attribute, + const std::string& _value, + const std::string& _units) + { + std::string str = _attribute + _value + _units; + irods::Hasher hasher; + irods::getHasher(irods::MD5_NAME, hasher); + hasher.update(str); + + std::string digest; + hasher.digest(digest); + + return _index_id + irods::indexing::indexer_separator + digest; + + } // get_metadata_index_id + + void invoke_indexing_event_metadata(ruleExecInfo_t* _rei, + const std::string& _object_path, + const std::string& _attribute, + const std::string& _value, + const std::string& _unit, + const std::string& _index_name, + nlohmann::json& obj_meta) + { + try { + bool is_coll{}; + auto object_id = get_object_index_id(_rei, _object_path, &is_coll); + + std::optional jsonarray; + get_metadata_for_object_index_id(_rei, object_id, is_coll, jsonarray); + if (!jsonarray) { + irods::log(LOG_WARNING, + fmt::format( + "In {}, function {}: Aborted indexing metadata, null AVU array returned for object [{}]", + __FILE__, + __func__, + _object_path)); + return; + } + obj_meta["metadataEntries"] = *jsonarray; + + const auto response = + send_http_request(http::verb::put, fmt::format("{}/_doc/{}", _index_name, object_id), obj_meta.dump()); + + if (!response.has_value()) { + THROW(SYS_INTERNAL_ERR, + fmt::format("failed to index metadata [{}] [{}] [{}] for [{}]. No response.", + _attribute, + _value, + _unit, + _object_path)); + } + + if (response->result_int() != 200 && response->result_int() != 201) { + THROW(SYS_INTERNAL_ERR, + fmt::format("failed to index metadata [{}] [{}] [{}] for [{}] code [{}] message [{}]", + _attribute, + _value, + _unit, + _object_path, + response->result_int(), + response->body())); + } + } + catch (const irods::exception& _e) { + rodsLog(LOG_ERROR, "Exception [%s]", _e.what()); + auto irods_error = _e.code(); + if (irods_error != CAT_NO_ROWS_FOUND) { + THROW(irods_error, _e.what()); + } + } + catch (const std::runtime_error& _e) { + rodsLog(LOG_ERROR, "Exception [%s]", _e.what()); + THROW(SYS_INTERNAL_ERR, _e.what()); + } + catch (const std::exception& _e) { + rodsLog(LOG_ERROR, "Exception [%s]", _e.what()); + THROW(SYS_INTERNAL_ERR, _e.what()); + } + } // invoke_indexing_event_metadata + + void invoke_purge_event_metadata(ruleExecInfo_t* _rei, + const std::string& _object_path, + const std::string& _attribute, + const std::string& _value, + const std::string& _unit, + const std::string& _index_name, + const nlohmann::json& = {}) + { + try { + namespace fs = irods::experimental::filesystem; + + // we now accept object id or path here, so pep_api_rm_coll_post can purge + const auto object_id = + fs::path{_object_path}.is_absolute() ? get_object_index_id(_rei, _object_path) : _object_path; + + const auto response = + send_http_request(http::verb::delete_, fmt::format("{}/_doc/{}", _index_name, object_id)); + + if (!response.has_value()) { + auto msg = fmt::format("{}: No response from elasticsearch host.", __func__); + rodsLog(LOG_ERROR, msg.c_str()); + THROW(SYS_INTERNAL_ERR, std::move(msg)); + } + + switch (response->result_int()) { + // either the index has been deleted, or the AVU was cleared unexpectedly + case 404: + rodsLog(LOG_NOTICE, + fmt::format("received HTTP status code of 404: no index entry for AVU ({}, {}, {}) on " + "object [{}] in index [{}]", + _attribute, + _value, + _unit, + _object_path, + _index_name) + .c_str()); + break; + // routinely expected return codes ( not logged ): + case 200: + case 201: + break; + // unexpected return codes: + default: + THROW(SYS_INTERNAL_ERR, + fmt::format("failed to index metadata [{}] [{}] [{}] for [{}] code [{}] message [{}]", + _attribute, + _value, + _unit, + _object_path, + response->result_int(), + response->body())); + } + } + catch (const std::runtime_error& _e) { + rodsLog(LOG_ERROR, "Exception [%s]", _e.what()); + THROW(SYS_INTERNAL_ERR, _e.what()); + } + catch (const std::exception& _e) { + rodsLog(LOG_ERROR, "Exception [%s]", _e.what()); + THROW(SYS_INTERNAL_ERR, _e.what()); + } + } // invoke_purge_event_metadata + + irods::error start(irods::default_re_ctx&, const std::string& _instance_name) + { + RuleExistsHelper::Instance()->registerRuleRegex("irods_policy_.*"); + + try { + config = std::make_unique(_instance_name); + } + catch (const irods::exception& e) { + return e; + } + + object_index_policy = + irods::indexing::policy::compose_policy_name(irods::indexing::policy::object::index, "elasticsearch"); + object_purge_policy = + irods::indexing::policy::compose_policy_name(irods::indexing::policy::object::purge, "elasticsearch"); + metadata_index_policy = + irods::indexing::policy::compose_policy_name(irods::indexing::policy::metadata::index, "elasticsearch"); + metadata_purge_policy = + irods::indexing::policy::compose_policy_name(irods::indexing::policy::metadata::purge, "elasticsearch"); + + return SUCCESS(); + } + + irods::error stop(irods::default_re_ctx&, const std::string&) + { + return SUCCESS(); + } + + irods::error rule_exists(irods::default_re_ctx&, const std::string& _rn, bool& _ret) + { + _ret = "irods_policy_recursive_rm_object_by_path" == _rn || object_index_policy == _rn || + object_purge_policy == _rn || metadata_index_policy == _rn || metadata_purge_policy == _rn; + return SUCCESS(); + } + + irods::error list_rules(irods::default_re_ctx&, std::vector& _rules) + { + _rules.push_back(object_index_policy); + _rules.push_back(object_purge_policy); + _rules.push_back(metadata_index_policy); + _rules.push_back(metadata_purge_policy); + return SUCCESS(); + } + + irods::error exec_rule(irods::default_re_ctx&, + const std::string& _rn, + std::list& _args, + irods::callback _eff_hdlr) + { + ruleExecInfo_t* rei{}; + const auto err = _eff_hdlr("unsafe_ms_ctx", &rei); + + if (!err.ok()) { + return err; + } + + using nlohmann::json; + try { + if (_rn == object_index_policy) { + auto it = _args.begin(); + const std::string object_path{boost::any_cast(*it)}; + ++it; + const std::string source_resource{boost::any_cast(*it)}; + ++it; + const std::string index_name{boost::any_cast(*it)}; + ++it; + + invoke_indexing_event_full_text(rei, object_path, source_resource, index_name); + } + else if (_rn == object_purge_policy) { + auto it = _args.begin(); + const std::string object_path{boost::any_cast(*it)}; + ++it; + const std::string source_resource{boost::any_cast(*it)}; + ++it; + const std::string index_name{boost::any_cast(*it)}; + ++it; + + invoke_purge_event_full_text(rei, object_path, source_resource, index_name); + } + else if (_rn == metadata_index_policy || _rn == metadata_purge_policy) { + auto it = _args.begin(); + const std::string object_path{boost::any_cast(*it)}; + ++it; + const std::string attribute{boost::any_cast(*it)}; + ++it; + const std::string value{boost::any_cast(*it)}; + ++it; + const std::string unit{boost::any_cast(*it)}; + ++it; + const std::string index_name{boost::any_cast(*it)}; + ++it; + + std::string obj_meta_str = "{}"; + + if (it != _args.end()) { + obj_meta_str = boost::any_cast(*it++); + } + + json obj_meta = nlohmann::json::parse(obj_meta_str); + + // purge with AVU by name? + if (_rn == metadata_purge_policy && attribute.empty()) { + // delete the indexed entry + invoke_purge_event_metadata(rei, object_path, attribute, value, unit, index_name); + } + else { + // update the indexed entry + invoke_indexing_event_metadata(rei, object_path, attribute, value, unit, index_name, obj_meta); + } + } + else if (_rn == "irods_policy_recursive_rm_object_by_path") { + auto it = _args.begin(); + const std::string the_path{boost::any_cast(*it)}; + std::advance(it, 2); + const json recurse_info = json::parse(boost::any_cast(*it)); + + const auto escaped_path = [p = the_path]() mutable { + boost::replace_all(p, "\\", "\\\\"); + boost::replace_all(p, "?", "\\?"); + boost::replace_all(p, "*", "\\*"); + return p; + }(); + + std::string JtopLevel = json{{"query", {{"match", {{"absolutePath", escaped_path}}}}}}.dump(); + std::string JsubObject; + + try { + if (recurse_info.at("is_collection").get()) { + JsubObject = + json{{"query", {{"wildcard", {{"absolutePath", {{"value", escaped_path + "/*"}}}}}}}} + .dump(); + } + } + catch (const json::exception& e) { + return ERROR(SYS_LIBRARY_ERROR, e.what()); + } + + try { + for (const auto& e : recurse_info.at("indices")) { + const auto& index_name = e.get_ref(); + + for (const std::string& query_input : {JtopLevel, JsubObject}) { + if (query_input.empty()) { + continue; + } + + const auto response = send_http_request( + http::verb::post, fmt::format("{}/_delete_by_query", index_name), query_input); + + if (!response.has_value()) { + rodsLog(LOG_ERROR, + fmt::format("{}: No response from elasticsearch host.", __func__).c_str()); + continue; + } + + if (response->result_int() != 200) { + rodsLog( + LOG_WARNING, + fmt::format( + "{}: _delete_by_query failed [rule=[{}], path=[{}]", __func__, _rn, the_path) + .c_str()); + } + } + } + } + catch (const nlohmann::json::parse_error& e) { + rodsLog(LOG_ERROR, fmt::format("JSON parse exception : [{}]", e.what()).c_str()); + } + } // "irods_policy_recursive_rm_object_by_path" + else { + return ERROR(SYS_NOT_SUPPORTED, _rn); + } + } + catch (const std::invalid_argument& _e) { + irods::indexing::exception_to_rerror(SYS_NOT_SUPPORTED, _e.what(), rei->rsComm->rError); + return ERROR(SYS_NOT_SUPPORTED, _e.what()); + } + catch (const boost::bad_any_cast& _e) { + irods::indexing::exception_to_rerror(INVALID_ANY_CAST, _e.what(), rei->rsComm->rError); + return ERROR(SYS_NOT_SUPPORTED, _e.what()); + } + catch (const irods::exception& _e) { + irods::indexing::exception_to_rerror(_e, rei->rsComm->rError); + return irods::error(_e); + } + + return err; + } // exec_rule + + irods::error exec_rule_text(irods::default_re_ctx&, + const std::string&, + msParamArray_t*, + const std::string&, + irods::callback) + { + return ERROR(RULE_ENGINE_CONTINUE, "exec_rule_text is not supported"); + } // exec_rule_text + + irods::error exec_rule_expression(irods::default_re_ctx&, const std::string&, msParamArray_t*, irods::callback) + { + return ERROR(RULE_ENGINE_CONTINUE, "exec_rule_expression is not supported"); + } // exec_rule_expression } // namespace -irods::error start( - irods::default_re_ctx&, - const std::string& _instance_name ) { - - RuleExistsHelper::Instance()->registerRuleRegex("irods_policy_.*"); - config = std::make_unique(_instance_name); - object_index_policy = irods::indexing::policy::compose_policy_name( - irods::indexing::policy::object::index, - "elasticsearch"); - object_purge_policy = irods::indexing::policy::compose_policy_name( - irods::indexing::policy::object::purge, - "elasticsearch"); - metadata_index_policy = irods::indexing::policy::compose_policy_name( - irods::indexing::policy::metadata::index, - "elasticsearch"); - metadata_purge_policy = irods::indexing::policy::compose_policy_name( - irods::indexing::policy::metadata::purge, - "elasticsearch"); - - if (getRodsLogLevel() > LOG_NOTICE) { - elasticlient::setLogFunction(log_fcn); - } - return SUCCESS(); -} - -irods::error stop( - irods::default_re_ctx&, - const std::string& ) { - return SUCCESS(); -} - -irods::error rule_exists( - irods::default_re_ctx&, - const std::string& _rn, - bool& _ret) { - _ret = "irods_policy_recursive_rm_object_by_path" == _rn || - object_index_policy == _rn || - object_purge_policy == _rn || - metadata_index_policy == _rn || - metadata_purge_policy == _rn; - return SUCCESS(); -} - -irods::error list_rules( - irods::default_re_ctx&, - std::vector& _rules) { - _rules.push_back(object_index_policy); - _rules.push_back(object_purge_policy); - _rules.push_back(metadata_index_policy); - _rules.push_back(metadata_purge_policy); - return SUCCESS(); -} - -irods::error exec_rule( - irods::default_re_ctx&, - const std::string& _rn, - std::list& _args, - irods::callback _eff_hdlr) { - ruleExecInfo_t* rei{}; - const auto err = _eff_hdlr("unsafe_ms_ctx", &rei); - - if(!err.ok()) { - return err; - } - - using nlohmann::json; - try { - if(_rn == object_index_policy) { - auto it = _args.begin(); - const std::string object_path{ boost::any_cast(*it) }; ++it; - const std::string source_resource{ boost::any_cast(*it) }; ++it; - const std::string index_name{ boost::any_cast(*it) }; ++it; - - invoke_indexing_event_full_text( - rei, - object_path, - source_resource, - index_name); - } - else if(_rn == object_purge_policy) { - auto it = _args.begin(); - const std::string object_path{ boost::any_cast(*it) }; ++it; - const std::string source_resource{ boost::any_cast(*it) }; ++it; - const std::string index_name{ boost::any_cast(*it) }; ++it; - - invoke_purge_event_full_text( - rei, - object_path, - source_resource, - index_name); - } - else if(_rn == metadata_index_policy || _rn == metadata_purge_policy) { - - auto it = _args.begin(); - const std::string object_path{ boost::any_cast(*it) }; ++it; - const std::string attribute{ boost::any_cast(*it) }; ++it; - const std::string value{ boost::any_cast(*it) }; ++it; - const std::string unit{ boost::any_cast(*it) }; ++it; - const std::string index_name{ boost::any_cast(*it) }; ++it; - - std::string obj_meta_str = "{}"; - - if (it != _args.end()) { - obj_meta_str = boost::any_cast(*it++); - } - - json obj_meta = nlohmann::json::parse(obj_meta_str); - - if (_rn == metadata_purge_policy && attribute.empty()) { // purge with AVU by name? - - invoke_purge_event_metadata( // delete the indexed entry - rei, - object_path, - attribute, - value, - unit, - index_name); - } - else { - invoke_indexing_event_metadata( // update the indexed entry - rei, - object_path, - attribute, - value, - unit, - index_name, - obj_meta); - } - - } - else if(_rn == "irods_policy_recursive_rm_object_by_path") { - using nlohmann::json; - auto it = _args.begin(); - const std::string the_path{ boost::any_cast(*it) }; - std::advance( it, 2 ); - const json recurse_info = json::parse(boost::any_cast(*it)); - auto escape = [] (std::string path_) -> std::string { boost::replace_all ( path_, "\\" , "\\\\"); - boost::replace_all ( path_, "?" , "\\?"); - boost::replace_all ( path_, "*" , "\\*"); - return path_;}; - auto escaped_path = escape(the_path); - std::string JtopLevel = json{{"query",{{"match",{{"absolutePath",escaped_path}} }} }}.dump(); - std::string JsubObject{""}; - try { - if (recurse_info["is_collection"].get()) { - JsubObject = json{{"query",{{"wildcard",{{"absolutePath",{{"value",escaped_path+"/*"}} }} }} }}.dump(); - } - } - catch(const std::domain_error & e) { - return ERROR(-1,fmt::format("_delete_by_query - stopped short of performRequest - domain_error: {}",e.what())); - } - elasticlient::Client client { config->hosts_ }; - - try { - rsComm_t& comm = *rei->rsComm; - for (const std::string & e : recurse_info["indices"]) { - const std::string del_by_query_URL { e + "/_delete_by_query" } ; - for (const std::string &json_out: {JtopLevel,JsubObject}) { - if (json_out == "") { continue; } - auto response = client.performRequest( HTTPMethod::POST, del_by_query_URL, json_out); - if(response.status_code != 200) { - irods::log( LOG_WARNING, fmt::format("_delete_by_query - response code not 200" - "\n\t- for path [{}]" - "\n\t- escaped as [{}]" - "\n\t- json request body is [{}]",the_path,escaped_path,json_out)); - } - } - } - } - catch (const elasticlient::ConnectionException & e) { - irods::log(LOG_ERROR, fmt::format("Cannot reach elasticsearch on : [{}]",fmt::join(config->hosts_, ", "))); - } - catch (const nlohmann::json::parse_error & e) { - irods::log(LOG_ERROR, fmt::format("JSON parse exception : [{}]", e.what())); - } - } // "irods_policy_recursive_rm_object_by_path" - else { - return ERROR( - SYS_NOT_SUPPORTED, - _rn); - } - } - catch(const std::invalid_argument& _e) { - irods::indexing::exception_to_rerror( - SYS_NOT_SUPPORTED, - _e.what(), - rei->rsComm->rError); - return ERROR( - SYS_NOT_SUPPORTED, - _e.what()); - } - catch(const boost::bad_any_cast& _e) { - irods::indexing::exception_to_rerror( - INVALID_ANY_CAST, - _e.what(), - rei->rsComm->rError); - return ERROR( - SYS_NOT_SUPPORTED, - _e.what()); - } - catch(const irods::exception& _e) { - irods::indexing::exception_to_rerror( - _e, - rei->rsComm->rError); - return irods::error(_e); - } - - return err; - -} // exec_rule - -irods::error exec_rule_text( - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - const std::string&, - irods::callback ) { - return ERROR( - RULE_ENGINE_CONTINUE, - "exec_rule_text is not supported"); -} // exec_rule_text - -irods::error exec_rule_expression( - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - irods::callback) { - return ERROR( - RULE_ENGINE_CONTINUE, - "exec_rule_expression is not supported"); -} // exec_rule_expression - -extern "C" -irods::pluggable_rule_engine* plugin_factory( - const std::string& _inst_name, - const std::string& _context ) { - irods::pluggable_rule_engine* re = - new irods::pluggable_rule_engine( - _inst_name, - _context); - re->add_operation< - irods::default_re_ctx&, - const std::string&>( - "start", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&)>(start)); - re->add_operation< - irods::default_re_ctx&, - const std::string&>( - "stop", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&)>(stop)); - re->add_operation< - irods::default_re_ctx&, - const std::string&, - bool&>( - "rule_exists", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - bool&)>(rule_exists)); - re->add_operation< - irods::default_re_ctx&, - std::vector&>( - "list_rules", - std::function< - irods::error( - irods::default_re_ctx&, - std::vector&)>(list_rules)); - re->add_operation< - irods::default_re_ctx&, - const std::string&, - std::list&, - irods::callback>( - "exec_rule", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - std::list&, - irods::callback)>(exec_rule)); - re->add_operation< - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - const std::string&, - irods::callback>( - "exec_rule_text", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - const std::string&, - irods::callback)>(exec_rule_text)); - - re->add_operation< - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - irods::callback>( - "exec_rule_expression", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - irods::callback)>(exec_rule_expression)); - return re; - +extern "C" irods::pluggable_rule_engine* plugin_factory(const std::string& _inst_name, + const std::string& _context) +{ + irods::pluggable_rule_engine* re = + new irods::pluggable_rule_engine(_inst_name, _context); + re->add_operation( + "start", std::function(start)); + re->add_operation( + "stop", std::function(stop)); + re->add_operation( + "rule_exists", std::function(rule_exists)); + re->add_operation&>( + "list_rules", std::function&)>(list_rules)); + re->add_operation&, irods::callback>( + "exec_rule", + std::function&, irods::callback)>(exec_rule)); + re->add_operation( + "exec_rule_text", + std::function( + exec_rule_text)); + + re->add_operation( + "exec_rule_expression", + std::function( + exec_rule_expression)); + return re; } // plugin_factory - - - - diff --git a/libirods_rule_engine_plugin-indexing.cpp b/libirods_rule_engine_plugin-indexing.cpp index dd92cbc..7ad2fc9 100644 --- a/libirods_rule_engine_plugin-indexing.cpp +++ b/libirods_rule_engine_plugin-indexing.cpp @@ -1,1254 +1,1078 @@ +#include "indexing_utilities.hpp" +#include "utilities.hpp" -// =-=-=-=-=-=-=- -// irods includes -#define IRODS_QUERY_ENABLE_SERVER_SIDE_API -#include +#include +#include #include #include -#include #include +#include #include -#define IRODS_FILESYSTEM_ENABLE_SERVER_SIDE_API +#define IRODS_FILESYSTEM_ENABLE_SERVER_SIDE_API #include -#include +#define IRODS_QUERY_ENABLE_SERVER_SIDE_API +#include -#include "utilities.hpp" -#include "indexing_utilities.hpp" +#include +#include +#include +#include + +#include +#include #undef LIST -// =-=-=-=-=-=-=- -// stl includes +#include #include -#include -#include -#include -#include +#include #include +#include +#include #include -#include - -#include -#include +#include #include +#include -// =-=-=-=-=-=-=- -// boost includes -#include -#include -#include -#include +using namespace std::string_literals; -#include +extern l1desc_t L1desc[NUM_L1_DESC]; -#include +int _delayExec(const char* inActionCall, + const char* recoveryActionCall, + const char* delayCondition, + ruleExecInfo_t* rei); + +namespace +{ + // For handling of atomic metadata ops + + using metadata_tuple = std::tuple; + std::map> atomic_metadata_tuples{}; + std::string atomic_metadata_obj_path{}; + + // Other objects with visibility in this module only + + std::set indices_for_rm_coll; + bool collection_metadata_is_new = false; + std::unique_ptr config; + std::map> opened_objects; + + const char* rm_force_kw = "*"; // default value tested for in "*_post" PEPs + + //-- Collect the AVU indexing indicators from collections in an object's parent + //- chain or attached to subcollections (any level deep) of the given object. + //- This is a convenient and not too far-reaching superset of the actual set + //- of indicators that apply for the deletion of this and all sub objects. Note + //- the object is going away, so there is no harm in deleting mentions of it + //- and any subobjects from all indices so computed, even if some of them don't + //- refer to the object(s). + + auto get_indices_for_delete_by_query(rsComm_t& comm, const std::string& _object_name, const bool recurs) + -> std::set + { + using irods::indexing::parse_indexer_string; + + namespace fs = irods::experimental::filesystem; + namespace fsvr = irods::experimental::filesystem::server; + using fsp = fs::path; + + fsp node{_object_name}, up_node{_object_name}; + + std::set indices; + + auto get_indices_from_collection_AVUs = [&](const std::string& collection) { + irods::query q{&comm, + fmt::format("select META_COLL_ATTR_VALUE where " + "META_COLL_ATTR_NAME = '{}' and COLL_NAME = '{}'", + config->index, + collection)}; + for (const auto& row : q) { + std::string index_name = std::get<0>(parse_indexer_string(row[0])); + indices.insert(index_name); + } + }; + + while (!up_node.empty()) { + if (fsvr::is_collection(comm, up_node)) { + get_indices_from_collection_AVUs(up_node.string()); + } + up_node = up_node.parent_path(); + if (0 == up_node.compare(node.root_collection())) { + break; + } + } + + if (recurs) { + auto iter_end = fsvr::recursive_collection_iterator{}; + auto iter = fsvr::recursive_collection_iterator{comm, _object_name}; + for (; iter != iter_end; ++iter) { + if (fsvr::is_collection(comm, *iter)) { + get_indices_from_collection_AVUs(iter->path().string()); + } + } + } + + return indices; + } + + // -=-=-= Search for objPath, return L1 desc, Resource name + // - + // - get_index_and_resource(const dataObjInp_t* _inp) + // - + + std::tuple get_index_and_resource(const dataObjInp_t* _inp) + { + int l1_idx{}; + dataObjInfo_t* obj_info{}; + for (const auto& l1 : L1desc) { + if (FD_INUSE != l1.inuseFlag) { + continue; + } + if (!strcmp(l1.dataObjInp->objPath, _inp->objPath)) { + obj_info = l1.dataObjInfo; + l1_idx = &l1 - L1desc; + } + } + + if (nullptr == obj_info) { + THROW(SYS_INVALID_INPUT_PARAM, "no object found"); + } + + std::string resource_name; + irods::error err = + irods::get_resource_property(obj_info->rescId, irods::RESOURCE_NAME, resource_name); + if (!err.ok()) { + THROW(err.code(), err.result()); + } + + return std::make_tuple(l1_idx, resource_name); + } // get_object_path_and_resource -using namespace std::string_literals; +#define NULL_PTR_GUARD(x) ((x) == nullptr ? "" : (x)) -extern l1desc_t L1desc[NUM_L1_DESC]; + // - + // -=-=-= For the various PEP's , setup, schedule and/or initiate indexing policy + // - + // - apply_indexing_policy (const dataObjInp_t* _inp) + // - + + void apply_indexing_policy(const std::string& _rn, ruleExecInfo_t* _rei, std::list& _args) + { + try { + std::string object_path; + std::string source_resource; + // NOTE:: 3rd parameter is the target + if ("pep_api_data_obj_put_post" == _rn) { + auto it = _args.begin(); + std::advance(it, 2); + if (_args.end() == it) { + THROW(SYS_INVALID_INPUT_PARAM, "invalid number of arguments"); + } + + auto obj_inp = boost::any_cast(*it); + object_path = obj_inp->objPath; + + const char* resc_hier = getValByKey(&obj_inp->condInput, RESC_HIER_STR_KW); + if (!resc_hier) { + THROW(SYS_INVALID_INPUT_PARAM, "resc hier is null"); + } + + irods::hierarchy_parser parser; + parser.set_string(resc_hier); + parser.last_resc(source_resource); + + irods::indexing::indexer idx{_rei, config->instance_name}; + idx.schedule_full_text_indexing_event(object_path, _rei->rsComm->clientUser.userName, source_resource); + + const char* metadata_included = getValByKey(&obj_inp->condInput, METADATA_INCLUDED_KW); + if (metadata_included) { + idx.schedule_metadata_indexing_event(object_path, + _rei->rsComm->clientUser.userName, + "a", + "v", + "u"); // "a","v","u" values are not significant; this is + // just a trigger to (re-)index all AVUs. Ref: #117 + } + } + else if ("pep_api_data_obj_repl_post" == _rn) { + auto it = _args.begin(); + std::advance(it, 2); + if (_args.end() == it) { + THROW(SYS_INVALID_INPUT_PARAM, "invalid number of arguments"); + } + + auto obj_inp = boost::any_cast(*it); + object_path = obj_inp->objPath; + const char* resc_hier = getValByKey(&obj_inp->condInput, DEST_RESC_HIER_STR_KW); + if (!resc_hier) { + THROW(SYS_INVALID_INPUT_PARAM, "resc hier is null"); + } + + irods::hierarchy_parser parser; + parser.set_string(resc_hier); + parser.last_resc(source_resource); + + irods::indexing::indexer idx{_rei, config->instance_name}; + idx.schedule_full_text_indexing_event(object_path, _rei->rsComm->clientUser.userName, source_resource); + } + else if ("pep_api_data_obj_open_post" == _rn || "pep_api_data_obj_create_post" == _rn) { + auto it = _args.begin(); + std::advance(it, 2); + if (_args.end() == it) { + THROW(SYS_INVALID_INPUT_PARAM, "invalid number of arguments"); + } + + auto obj_inp = boost::any_cast(*it); + if (obj_inp->openFlags & O_WRONLY || obj_inp->openFlags & O_RDWR) { + int l1_idx{}; + std::string resource_name; + try { + std::tie(l1_idx, resource_name) = get_index_and_resource(obj_inp); + opened_objects[l1_idx] = std::tie(obj_inp->objPath, resource_name); + } + catch (const irods::exception& _e) { + rodsLog(LOG_ERROR, "get_index_and_resource failed for [%s]", obj_inp->objPath); + } + } + } + else if ("pep_api_data_obj_close_post" == _rn) { + //TODO :: only for create/write events + auto it = _args.begin(); + std::advance(it, 2); + if (_args.end() == it) { + THROW(SYS_INVALID_INPUT_PARAM, "invalid number of arguments"); + } + + const auto opened_inp = boost::any_cast(*it); + const auto l1_idx = opened_inp->l1descInx; + if (opened_objects.find(l1_idx) != opened_objects.end()) { + std::string object_path, resource_name; + std::tie(object_path, resource_name) = opened_objects[l1_idx]; + irods::indexing::indexer idx{_rei, config->instance_name}; + idx.schedule_full_text_indexing_event( + object_path, _rei->rsComm->clientUser.userName, resource_name); + } + } + else if ("pep_api_mod_avu_metadata_pre" == _rn) { + auto it = _args.begin(); + std::advance(it, 2); + if (_args.end() == it) { + THROW(SYS_INVALID_INPUT_PARAM, "invalid number of arguments"); + } + + const auto avu_inp = boost::any_cast(*it); + const std::string attribute{avu_inp->arg3}; + if (config->index != attribute) { + return; + } + + const std::string operation{avu_inp->arg0}; + const std::string type{avu_inp->arg1}; + const std::string object_path{avu_inp->arg2}; + const std::string add{"add"}; + const std::string set{"set"}; + const std::string collection{"-C"}; + + irods::indexing::indexer idx{_rei, config->instance_name}; + if (operation == set || operation == add) { + if (type == collection) { + // was the added tag an indexing indicator + if (config->index == attribute) { + // verify that this is not new metadata with a query and set a flag + if (!avu_inp->arg3) { + THROW(SYS_INVALID_INPUT_PARAM, "empty metadata attribute"); + } + if (!avu_inp->arg4) { + THROW(SYS_INVALID_INPUT_PARAM, "empty metadata value"); + } + collection_metadata_is_new = !idx.metadata_exists_on_collection( + object_path, avu_inp->arg3, avu_inp->arg4, NULL_PTR_GUARD(avu_inp->arg5)); + } + } + } + } + else if ("pep_api_mod_avu_metadata_post" == _rn) { + auto it = _args.begin(); + std::advance(it, 2); + if (_args.end() == it) { + THROW(SYS_INVALID_INPUT_PARAM, "invalid number of arguments"); + } + + const auto avu_inp = boost::any_cast(*it); + const std::string operation{avu_inp->arg0}; + const std::string type{avu_inp->arg1}; + const std::string logical_path{avu_inp->arg2}; + + if (!avu_inp->arg3) { + THROW(SYS_INVALID_INPUT_PARAM, "empty metadata attribute"); + } + if (!avu_inp->arg4) { + THROW(SYS_INVALID_INPUT_PARAM, "empty metadata value"); + } + const std::string attribute{avu_inp->arg3}; + const std::string value{avu_inp->arg4}; + const std::string units{NULL_PTR_GUARD(avu_inp->arg5)}; + + if (config->flag == attribute) { + return; + } + + const std::string add{"add"}; + const std::string set{"set"}; + const std::string rm{"rm"}; + const std::string rmw{ + "rmw"}; // yet to be implemented; AVU args are "like" patterns to be used in a genquery + const std::string collection{"-C"}; + const std::string data_object{"-d"}; + + irods::indexing::indexer idx{_rei, config->instance_name}; + if (operation == rm) { + // removed index metadata from collection + if (type == collection) { + // was the removed tag an indexing indicator + if (config->index == attribute) { + // schedule a possible purge of all indexed data in collection + idx.schedule_collection_operation(irods::indexing::operation_type::purge, + logical_path, + _rei->rsComm->clientUser.userName, + value, + units); + } + } + // removed a single indexed AVU on an object or collection + if (type == data_object || (type == collection && config->index != attribute)) { + // schedule an AVU purge + idx.schedule_metadata_purge_event( + logical_path, _rei->rsComm->clientUser.userName, attribute, value, units); + } + } + else if (operation == set || operation == add) { + if (type == collection) { + // was the added tag an indexing indicator + if (config->index == attribute) { + // check the verify flag + if (collection_metadata_is_new) { + idx.schedule_collection_operation(irods::indexing::operation_type::index, + logical_path, + _rei->rsComm->clientUser.userName, + value, + units); + } + } + } + if (type == data_object || (type == collection && config->index != attribute)) { + idx.schedule_metadata_indexing_event( + logical_path, _rei->rsComm->clientUser.userName, attribute, value, units); + } + } + } + else if ("pep_api_data_obj_unlink_pre" == _rn) { + auto it = _args.begin(); + std::advance(it, 2); + if (_args.end() == it) { + THROW(SYS_INVALID_INPUT_PARAM, "invalid number of arguments"); + } + const auto obj_inp = boost::any_cast(*it); + if (auto* p = getValByKey(&obj_inp->condInput, FORCE_FLAG_KW); p != 0) { + rm_force_kw = p; + } + indices_for_rm_coll = get_indices_for_delete_by_query(*_rei->rsComm, obj_inp->objPath, false); + } + else if ("pep_api_data_obj_unlink_post" == _rn) { + auto it = _args.begin(); + std::advance(it, 2); + if (_args.end() == it) { + THROW(SYS_INVALID_INPUT_PARAM, "invalid number of arguments"); + } + const auto obj_inp = boost::any_cast(*it); + if ('*' != rm_force_kw[0]) { /* there was a force keyword */ + irods::indexing::indexer idx{_rei, config->instance_name}; + nlohmann::json recurseInfo{{"is_collection", false}}; + recurseInfo["indices"] = indices_for_rm_coll; + idx.schedule_metadata_purge_for_recursive_rm_object(obj_inp->objPath, recurseInfo); + } + } + else if ("pep_api_rm_coll_pre" == _rn) { + /** + * argument spec : + * [...?] + * + * before a collection is deleted. record whether FORCE_FLAG_KW is used. + * + **/ + auto it = _args.begin(); + std::advance(it, 2); + if (_args.end() == it) { + THROW(SYS_INVALID_INPUT_PARAM, "invalid number of arguments"); + } + CollInp* obj_inp = nullptr; + obj_inp = boost::any_cast(*it); + if (auto* p = getValByKey(&obj_inp->condInput, FORCE_FLAG_KW); p != 0) { + rm_force_kw = p; + } + indices_for_rm_coll = get_indices_for_delete_by_query(*_rei->rsComm, obj_inp->collName, true); + } + else if ("pep_api_rm_coll_post" == _rn) { + /** + * argument spec : + * [...?] + * + * collection has been deleted successfully. If FORCE_FLAG_KW was used, then purge the + * from the relevant indexes collection recursively + */ + + if ('*' != rm_force_kw[0]) { /* there was a force keyword */ + auto it = _args.begin(); + std::advance(it, 2); + if (_args.end() == it) { + THROW(SYS_INVALID_INPUT_PARAM, "invalid number of arguments"); + } + CollInp* obj_inp = nullptr; + obj_inp = boost::any_cast(*it); + irods::indexing::indexer idx{_rei, config->instance_name}; + nlohmann::json recurseInfo = {{"is_collection", true}}; + recurseInfo["indices"] = indices_for_rm_coll; + idx.schedule_metadata_purge_for_recursive_rm_object(obj_inp->collName, recurseInfo); + } + } + else if (_rn == "pep_api_atomic_apply_metadata_operations_pre" || + _rn == "pep_api_atomic_apply_metadata_operations_post") + { + using nlohmann::json; + auto pos = _rn.rfind("_p"); + auto when = _rn.substr(pos + 1); // "pre" or "post" + auto it = _args.begin(); + std::advance(it, 2); + + auto request = boost::any_cast(*it); + std::string requ_str{(const char*) request->buf, unsigned(request->len)}; + const auto& requ_json = json::parse(requ_str); + + std::string obj_path = requ_json["entity_name"]; // logical path + std::string obj_type = requ_json["entity_type"]; // "data_object" or "collection" + if ("pre" == when) { + atomic_metadata_obj_path = obj_path; + } + else { + if (atomic_metadata_obj_path != obj_path) { + THROW(SYS_INVALID_INPUT_PARAM, fmt::format("invalid object path for {} operation", _rn)); + } + } + auto& map = atomic_metadata_tuples[when]; + + namespace fs = irods::experimental::filesystem; + std::string dobj_name{fs::path{obj_path}.object_name()}; + std::string dobj_parent{fs::path{obj_path}.parent_path()}; + + auto query_str = fmt::format( + "SELECT META_{0}_ATTR_NAME, META_{0}_ATTR_VALUE, META_{0}_ATTR_UNITS where {0}_NAME = '{1}'", + (obj_type == "collection" ? "COLL" : "DATA"), + (obj_type == "collection" ? obj_path : dobj_name)); + if (obj_type != "collection") { + query_str += fmt::format(" and COLL_NAME = '{}'", dobj_parent); + } + + irods::query qobj{_rei->rsComm, query_str}; + + for (const auto& row : qobj) { + map.insert({row[0], row[1], row[2]}); + } + if (when == "post") { + std::vector avus_added_or_removed; + + const auto& pre_map = atomic_metadata_tuples["pre"]; + set_symmetric_difference(pre_map.begin(), + pre_map.end(), + map.cbegin(), + map.cend(), + std::back_inserter(avus_added_or_removed)); + + for (const auto& [attribute, value, units] : avus_added_or_removed) { + if (attribute != config->index) { + irods::indexing::indexer idx{_rei, config->instance_name}; + idx.schedule_metadata_indexing_event( + obj_path, _rei->rsComm->clientUser.userName, attribute, value, units); + break; // only need one event to re-index all AVU's + } + } + } + } + } + catch (const boost::bad_any_cast& _e) { + THROW(INVALID_ANY_CAST, boost::str(boost::format("function [%s] rule name [%s]") % __FUNCTION__ % _rn)); + } + } // apply_indexing_policy + + // -=-=-=-= Invoke policy on object. uses + // - + // - apply_object_policy (root, obj_path, src_resc, indexer, index_name, index_type) + // - + // - (1) Composes a policy name from (root , indexer) + // - (2) Invokes the policy by that name upon (obj_path, src_resc, index_name, index_type) + // - + + void apply_object_policy(ruleExecInfo_t* _rei, + const std::string& _policy_root, + const std::string& _object_path, + const std::string& _source_resource, + const std::string& _indexer, + const std::string& _index_name, + const std::string& _index_type) + { + const std::string policy_name{irods::indexing::policy::compose_policy_name(_policy_root, _indexer)}; + + std::list args; + args.push_back(boost::any(_object_path)); + args.push_back(boost::any(_source_resource)); + args.push_back(boost::any(_index_name)); + args.push_back(boost::any(_index_type)); + irods::indexing::invoke_policy(_rei, policy_name, args); + } // apply_object_policy + + void apply_specific_policy(ruleExecInfo_t* _rei, + const std::string& _policy_name, // request specific policy by name + const std::string& _object_path, + const std::string& _source_resource, + const std::string& _indexer, + const std::string& _index_name, + const std::string& _index_type) + { + std::list args; + args.push_back(boost::any(_object_path)); + args.push_back(boost::any(_source_resource)); + args.push_back(boost::any(_index_name)); + args.push_back(boost::any(_index_type)); + irods::indexing::invoke_policy(_rei, _policy_name, args); + } // apply_specific_policy + + /***********/ + + std::string to_lowercase(const std::string& src) + { + std::string dst; + std::transform(src.begin(), src.end(), std::back_inserter(dst), [](wchar_t w) { return tolower(w); }); + return dst; + } + + auto get_default_mime_type(const std::string& input) -> std::string + { + const static std::map default_mime_types{ + {".aac", "audio/aac"}, + {".abw", "application/x-abiword"}, + {".arc", "application/x-freearc"}, + {".avi", "video/x-msvideo"}, + {".azw", "application/vnd.amazon.ebook"}, + {".bin", "application/octet-stream"}, + {".bmp", "image/bmp"}, + {".bz", "application/x-bzip"}, + {".bz2", "application/x-bzip2"}, + {".cda", "application/x-cdf"}, + {".csh", "application/x-csh"}, + {".css", "text/css"}, + {".csv", "text/csv"}, + {".doc", "application/msword"}, + {".docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"}, + {".eot", "application/vnd.ms-fontobject"}, + {".epub", "application/epub+zip"}, + {".gz", "application/gzip"}, + {".gif", "image/gif"}, + {".htm", "text/html"}, + {".html", "text/html"}, + {".ico", "image/vnd.microsoft.icon"}, + {".ics", "text/calendar"}, + {".jar", "application/java-archive"}, + {".jpeg", "image/jpeg"}, + {".jpg", "image/jpeg"}, + {".js", "text/javascript"}, + {".json", "application/json"}, + {".jsonld", "application/ld+json"}, + {".mid", "audio/midi audio/x-midi"}, + {".midi", "audio/midi audio/x-midi"}, + {".mjs", "text/javascript"}, + {".mp3", "audio/mpeg"}, + {".mp4", "video/mp4"}, + {".mpeg", "video/mpeg"}, + {".mpkg", "application/vnd.apple.installer+xml"}, + {".odp", "application/vnd.oasis.opendocument.presentation"}, + {".ods", "application/vnd.oasis.opendocument.spreadsheet"}, + {".odt", "application/vnd.oasis.opendocument.text"}, + {".oga", "audio/ogg"}, + {".ogv", "video/ogg"}, + {".ogx", "application/ogg"}, + {".opus", "audio/opus"}, + {".otf", "font/otf"}, + {".png", "image/png"}, + {".pdf", "application/pdf"}, + {".php", "application/x-httpd-php"}, + {".ppt", "application/vnd.ms-powerpoint"}, + {".pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation"}, + {".rar", "application/vnd.rar"}, + {".rtf", "application/rtf"}, + {".sh", "application/x-sh"}, + {".svg", "image/svg+xml"}, + {".swf", "application/x-shockwave-flash"}, + {".tar", "application/x-tar"}, + {".tif", "image/tiff"}, + {".tiff", "image/tiff"}, + {".ts", "video/mp2t"}, + {".ttf", "font/ttf"}, + {".txt", "text/plain"}, + {".vsd", "application/vnd.visio"}, + {".wav", "audio/wav"}, + {".weba", "audio/webm"}, + {".webm", "video/webm"}, + {".webp", "image/webp"}, + {".woff", "font/woff"}, + {".woff2", "font/woff2"}, + {".xhtml", "application/xhtml+xml"}, + {".xls", "application/vnd.ms-excel"}, + {".xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"}, + {".xml", "application/xml"}, // if not readable from casual users (RFC 3023, section 3)" else "text/xml" + {".xul", "application/vnd.mozilla.xul+xml"}, + {".zip", "application/zip"}, + {".3gp", "video/3gpp"}, // audio/3gpp if it doesn't contain video, + {".3g2", "video/3gpp2"}, // audio/3gpp2 if it doesn't contain video, + {".7z", "application/x-7z-compressed"}, + }; + std::string retvalue{}; + if (auto offs = input.rfind("."); offs != std::string::npos) { + const std::string lower_case_ext = to_lowercase(input.substr(offs)); + try { + retvalue = default_mime_types.at(lower_case_ext); + } + catch (const std::out_of_range& e) { + irods::log( + LOG_DEBUG, + fmt::format("In {}, function {}: Unknown extension [{}]", __FILE__, __func__, lower_case_ext)); + } + } + return retvalue.size() ? retvalue : "application/octet-stream"; + } + + auto get_system_metadata(ruleExecInfo_t* _rei, const std::string& _obj_path) -> nlohmann::json + { + using nlohmann::json; + const boost::filesystem::path p{_obj_path}; + const std::string parent_name = p.parent_path().string(); + const std::string name = p.filename().string(); + namespace fs = irods::experimental::filesystem; + namespace fsvr = irods::experimental::filesystem::server; + auto irods_path = fs::path{_obj_path}; + const auto s = fsvr::status(*_rei->rsComm, irods_path); + std::string query_str; + + json obj; + + obj["absolutePath"] = _obj_path; + + bool is_collection = false; + if (fsvr::is_data_object(s)) { + query_str = fmt::format( + "SELECT DATA_ID , DATA_MODIFY_TIME, DATA_ZONE_NAME, COLL_NAME, DATA_SIZE where DATA_NAME = '{0}'" + " and COLL_NAME = '{1}' ", + name, + parent_name); + irods::query qobj{_rei->rsComm, query_str, 1}; + for (const auto& i : qobj) { + obj["lastModifiedDate"] = std::stol(i[1]); // epoch seconds + obj["zoneName"] = i[2]; + obj["parentPath"] = i[3]; + obj["dataSize"] = std::stol(i[4]); + obj["isFile"] = true; + break; + } + } + else if (fsvr::is_collection(s)) { + is_collection = true; + query_str = fmt::format( + "SELECT COLL_ID , COLL_MODIFY_TIME, COLL_ZONE_NAME, COLL_PARENT_NAME where COLL_NAME = '{0}'" + " and COLL_PARENT_NAME = '{1}' ", + _obj_path, + parent_name); + irods::query qobj{_rei->rsComm, query_str, 1}; + for (const auto& i : qobj) { + obj["lastModifiedDate"] = std::stol(i[1]); // epoch seconds + obj["zoneName"] = i[2]; + obj["parentPath"] = i[3]; + obj["dataSize"] = 0L; + obj["isFile"] = false; + break; + } + } + auto fileName = obj["fileName"] = irods_path.object_name(); + obj["mimeType"] = (is_collection ? "" : get_default_mime_type(fileName)); + return obj; + } // get_system_metadata + + void apply_metadata_policy(ruleExecInfo_t* _rei, + const std::string& _policy_root, + const std::string& _object_path, + const std::string& _indexer, + const std::string& _index_name + // -- now used only to distinguish between object and avu purges + , + const std::string& _attribute, + const std::string& _value, + const std::string& _units) + { + const std::string policy_name{irods::indexing::policy::compose_policy_name(_policy_root, _indexer)}; + + std::list args; + args.push_back(boost::any(_object_path)); + args.push_back( + boost::any(_attribute)); // was attr // -- not explicit anymore with new schema as AVU's are cataloged + args.push_back(boost::any(_value)); // was value // within the indexed entry for the object itself + args.push_back( + boost::any(_units)); // was units // As this is now a no-op, we should remove these arguments. + args.push_back(boost::any(_index_name)); + + args.push_back(boost::any(get_system_metadata(_rei, _object_path).dump())); + + irods::indexing::invoke_policy(_rei, policy_name, args); + } // apply_metadata_policy + + irods::error start(irods::default_re_ctx&, const std::string& _instance_name) + { + RuleExistsHelper::Instance()->registerRuleRegex("pep_api_.*"); + + try { + config = std::make_unique(_instance_name); + } + catch (const irods::exception& e) { + return e; + } + + rodsLog(LOG_DEBUG, "value of minimum_delay_time: %d", config->minimum_delay_time); + rodsLog(LOG_DEBUG, "value of maximum_delay_time: %d", config->maximum_delay_time); + rodsLog(LOG_DEBUG, "value of job_limit_per_collection_indexing_operation: %d", config->job_limit); + + return SUCCESS(); + } // start + + irods::error stop(irods::default_re_ctx&, const std::string&) + { + return SUCCESS(); + } // stop + + irods::error rule_exists(irods::default_re_ctx&, const std::string& _rn, bool& _ret) + { + const std::set rules{ + "pep_api_atomic_apply_metadata_operations_pre", + "pep_api_atomic_apply_metadata_operations_post", + "pep_api_data_obj_open_post", + "pep_api_data_obj_create_post", + "pep_api_data_obj_repl_post", + "pep_api_data_obj_unlink_pre", + "pep_api_data_obj_unlink_post", + "pep_api_mod_avu_metadata_pre", + "pep_api_mod_avu_metadata_post", + "pep_api_data_obj_close_post", + "pep_api_data_obj_put_post", + "pep_api_phy_path_reg_post", + "pep_api_rm_coll_pre", + "pep_api_rm_coll_post", + }; + _ret = rules.find(_rn) != rules.end(); + + return SUCCESS(); + } // rule_exists + + irods::error list_rules(irods::default_re_ctx&, std::vector&) + { + return SUCCESS(); + } // list_rules + + irods::error exec_rule(irods::default_re_ctx&, + const std::string& _rn, + std::list& _args, + irods::callback _eff_hdlr) + { + ruleExecInfo_t* rei{}; + const auto err = _eff_hdlr("unsafe_ms_ctx", &rei); + if (!err.ok()) { + return err; + } + try { + apply_indexing_policy(_rn, rei, _args); + } + catch (const std::invalid_argument& _e) { + irods::indexing::exception_to_rerror(SYS_NOT_SUPPORTED, _e.what(), rei->rsComm->rError); + return ERROR(SYS_NOT_SUPPORTED, _e.what()); + } + catch (const std::domain_error& _e) { + irods::indexing::exception_to_rerror(INVALID_ANY_CAST, _e.what(), rei->rsComm->rError); + return ERROR(SYS_NOT_SUPPORTED, _e.what()); + } + catch (const irods::exception& _e) { + irods::indexing::exception_to_rerror(_e, rei->rsComm->rError); + return irods::error(_e); + } + + return CODE(RULE_ENGINE_CONTINUE); + + } // exec_rule + + irods::error exec_rule_text(irods::default_re_ctx&, + const std::string& _rule_text, + msParamArray_t* _ms_params, + const std::string& _out_desc, + irods::callback _eff_hdlr) + { + using json = nlohmann::json; + + try { + // skip the first line: @external + std::string rule_text{_rule_text}; + if (_rule_text.find("@external") != std::string::npos) { + rule_text = _rule_text.substr(10); + } + const auto rule_obj = json::parse(rule_text); + const std::string& rule_engine_instance_name = rule_obj["rule-engine-instance-name"]; + // if the rule text does not have our instance name, fail + if (config->instance_name != rule_engine_instance_name) { + return ERROR(SYS_NOT_SUPPORTED, "instance name not found"); + } +#if 0 + // catalog / index drift correction + if(irods::indexing::schedule::indexing == + rule_obj["rule-engine-operation"]) { + ruleExecInfo_t* rei{}; + const auto err = _eff_hdlr("unsafe_ms_ctx", &rei); + if(!err.ok()) { + return err; + } + + const std::string& params = rule_obj["delay-parameters"]; + + json delay_obj; + delay_obj["rule-engine-operation"] = irods::indexing::policy::indexing; + + irods::indexing::indexer idx{rei, config->instance_name}; + idx.schedule_indexing_policy( + delay_obj.dump(), + params); + } + else +#endif + { + return ERROR(SYS_NOT_SUPPORTED, "supported rule name not found"); + } + } + catch (const std::invalid_argument& _e) { + std::string msg{"Rule text is not valid JSON -- "}; + msg += _e.what(); + return ERROR(SYS_NOT_SUPPORTED, msg); + } + catch (const std::domain_error& _e) { + std::string msg{"Rule text is not valid JSON -- "}; + msg += _e.what(); + return ERROR(SYS_NOT_SUPPORTED, msg); + } + catch (const irods::exception& _e) { + return ERROR(_e.code(), _e.what()); + } + + return SUCCESS(); + } // exec_rule_text + + irods::error exec_rule_expression(irods::default_re_ctx&, + const std::string& _rule_text, + msParamArray_t* _ms_params, + irods::callback _eff_hdlr) + { + using json = nlohmann::json; + ruleExecInfo_t* rei{}; + const auto err = _eff_hdlr("unsafe_ms_ctx", &rei); + if (!err.ok()) { + return err; + } + + try { + const auto rule_obj = json::parse(_rule_text); + if (irods::indexing::policy::object::index == rule_obj["rule-engine-operation"]) { + try { + // proxy for provided user name + const std::string& user_name = rule_obj["user-name"]; + rstrcpy(rei->rsComm->clientUser.userName, user_name.c_str(), NAME_LEN); + + // - implement (full-text?) indexing on an individual object + // - as a delayed task. + // - + apply_object_policy(rei, + irods::indexing::policy::object::index, + rule_obj["object-path"], + rule_obj["source-resource"], + rule_obj["indexer"], + rule_obj["index-name"], + rule_obj["index-type"]); + } + catch (const irods::exception& _e) { + printErrorStack(&rei->rsComm->rError); + return ERROR(_e.code(), _e.what()); + } + } + else if (irods::indexing::policy::object::purge == rule_obj["rule-engine-operation"]) { + try { + // proxy for provided user name + const std::string& user_name = rule_obj["user-name"]; + rstrcpy(rei->rsComm->clientUser.userName, user_name.c_str(), NAME_LEN); + + // - implement index purge on an individual object + // - as a delayed task. + // - + apply_object_policy(rei, + irods::indexing::policy::object::purge, + rule_obj["object-path"], + rule_obj["source-resource"], + rule_obj["indexer"], + rule_obj["index-name"], + rule_obj["index-type"]); + } + catch (const irods::exception& _e) { + printErrorStack(&rei->rsComm->rError); + return ERROR(_e.code(), _e.what()); + } + } + else if (irods::indexing::policy::collection::index == rule_obj["rule-engine-operation"]) { + // - launch delayed task to handle indexing events under a collection + // - ( example : a new indexing AVU was placed on the collection ) + // - + irods::indexing::indexer idx{rei, config->instance_name}; + idx.schedule_policy_events_for_collection(irods::indexing::operation_type::index, + rule_obj["collection-name"], + rule_obj["user-name"], + rule_obj["indexer"], + rule_obj["index-name"], + rule_obj["index-type"]); + } + else if (irods::indexing::policy::collection::purge == rule_obj["rule-engine-operation"]) { + // - launch delayed task to handle indexing events under a collection + // - ( example : an indexing AVU was removed from the collection ) + // - + irods::indexing::indexer idx{rei, config->instance_name}; + idx.schedule_policy_events_for_collection(irods::indexing::operation_type::purge, + rule_obj["collection-name"], + rule_obj["user-name"], + rule_obj["indexer"], + rule_obj["index-name"], + rule_obj["index-type"]); + } + else if (irods::indexing::policy::metadata::index == rule_obj["rule-engine-operation"]) { + try { + // proxy for provided user name + const std::string& user_name = rule_obj["user-name"]; + rstrcpy(rei->rsComm->clientUser.userName, user_name.c_str(), NAME_LEN); + + apply_metadata_policy(rei, + irods::indexing::policy::metadata::index, + rule_obj["object-path"], + rule_obj["indexer"], + rule_obj["index-name"], + rule_obj["attribute"], + rule_obj["value"], + rule_obj["units"]); + } + catch (const irods::exception& _e) { + printErrorStack(&rei->rsComm->rError); + return ERROR(_e.code(), _e.what()); + } + } + else if (irods::indexing::policy::metadata::purge == rule_obj["rule-engine-operation"]) { + try { + // proxy for provided user name + const std::string& user_name = rule_obj["user-name"]; + rstrcpy(rei->rsComm->clientUser.userName, user_name.c_str(), NAME_LEN); + + apply_metadata_policy(rei, + irods::indexing::policy::metadata::purge, + rule_obj["object-path"], + rule_obj["indexer"], + rule_obj["index-name"], + rule_obj["attribute"], + rule_obj["value"], + rule_obj["units"]); + } + catch (const irods::exception& _e) { + printErrorStack(&rei->rsComm->rError); + return ERROR(_e.code(), _e.what()); + } + } + else if ("irods_policy_recursive_rm_object_by_path" == rule_obj["rule-engine-operation"]) { + const std::string& user_name = rule_obj["user-name"]; + rstrcpy(rei->rsComm->clientUser.userName, user_name.c_str(), NAME_LEN); + + apply_specific_policy(rei, + "irods_policy_recursive_rm_object_by_path", + rule_obj["object-path"], + rule_obj["source-resource"], + rule_obj["indexer"], + rule_obj["index-name"], + rule_obj["index-type"]); + } + else { + printErrorStack(&rei->rsComm->rError); + return ERROR(SYS_NOT_SUPPORTED, "supported rule name not found"); + } + } + catch (const json::parse_error& _e) { + rodsLog(LOG_ERROR, + "Exception (%s). Could not parse JSON rule text @ FILE %s LINE %d FUNCTION %s ", + _e.what(), + __FILE__, + __LINE__, + __FUNCTION__); + return CODE(RULE_ENGINE_CONTINUE); + } + catch (const std::invalid_argument& _e) { + return ERROR(SYS_NOT_SUPPORTED, _e.what()); + } + catch (const std::domain_error& _e) { + return ERROR(SYS_NOT_SUPPORTED, _e.what()); + } + catch (const irods::exception& _e) { + return ERROR(_e.code(), _e.what()); + } + + return SUCCESS(); + } // exec_rule_expression +} // namespace +extern "C" irods::pluggable_rule_engine* plugin_factory(const std::string& _inst_name, + const std::string& _context) +{ + irods::pluggable_rule_engine* re = + new irods::pluggable_rule_engine(_inst_name, _context); -int _delayExec( - const char *inActionCall, - const char *recoveryActionCall, - const char *delayCondition, - ruleExecInfo_t *rei ); - -namespace { - - // For handling of atomic metadata ops - - using metadata_tuple = std::tuple; - std::map > atomic_metadata_tuples {}; - std::string atomic_metadata_obj_path {}; - - // Other objects with visibility in this module only - - std::set indices_for_rm_coll; - bool collection_metadata_is_new = false; - std::unique_ptr config; - std::map> opened_objects; - - const char* rm_force_kw = "*"; // default value tested for in "*_post" PEPs - - //-- Collect the AVU indexing indicators from collections in an object's parent - //- chain or attached to subcollections (any level deep) of the given object. - //- This is a convenient and not too far-reaching superset of the actual set - //- of indicators that apply for the deletion of this and all sub objects. Note - //- the object is going away, so there is no harm in deleting mentions of it - //- and any subobjects from all indices so computed, even if some of them don't - //- refer to the object(s). - - auto get_indices_for_delete_by_query (rsComm_t& comm, const std::string &_object_name, const bool recurs) -> std::set - { - using irods::indexing::parse_indexer_string; - - namespace fs = irods::experimental::filesystem; - namespace fsvr = irods::experimental::filesystem::server; - using fsp = fs::path; - - fsp node {_object_name}, up_node {_object_name}; - - std::set indices; - - auto get_indices_from_collection_AVUs = [&](const std::string& collection){ - irods::query q { &comm, fmt::format("select META_COLL_ATTR_VALUE where " - "META_COLL_ATTR_NAME = '{}' and COLL_NAME = '{}'", - config->index,collection) }; - for (const auto &row : q) { - std::string index_name = std::get<0>(parse_indexer_string(row[0])); - indices.insert( index_name ); - } - }; - - while (!up_node.empty()) { - if (fsvr::is_collection(comm,up_node)) { get_indices_from_collection_AVUs(up_node.string()); } - up_node = up_node.parent_path(); - if (0 == up_node.compare(node.root_collection())) { break; } - } - - if (recurs) { - auto iter_end = fsvr::recursive_collection_iterator{}; - auto iter = fsvr::recursive_collection_iterator{comm, _object_name}; - for (; iter != iter_end ; ++iter) { - if (fsvr::is_collection(comm,*iter)) { get_indices_from_collection_AVUs(iter->path().string()); } - } - } - - return indices; - } - - // -=-=-= Search for objPath, return L1 desc, Resource name - // - - // - get_index_and_resource(const dataObjInp_t* _inp) - // - - - std::tuple - get_index_and_resource(const dataObjInp_t* _inp) { - int l1_idx{}; - dataObjInfo_t* obj_info{}; - for(const auto& l1 : L1desc) { - if(FD_INUSE != l1.inuseFlag) { - continue; - } - if(!strcmp(l1.dataObjInp->objPath, _inp->objPath)) { - obj_info = l1.dataObjInfo; - l1_idx = &l1 - L1desc; - } - } - - if(nullptr == obj_info) { - THROW( - SYS_INVALID_INPUT_PARAM, - "no object found"); - } - - std::string resource_name; - irods::error err = irods::get_resource_property( - obj_info->rescId, - irods::RESOURCE_NAME, - resource_name); - if(!err.ok()) { - THROW(err.code(), err.result()); - } - - return std::make_tuple(l1_idx, resource_name); - } // get_object_path_and_resource + re->add_operation( + "start", std::function(start)); -#define NULL_PTR_GUARD(x) ((x) == nullptr ? "" : (x)) + re->add_operation( + "stop", std::function(stop)); - // - - // -=-=-= For the various PEP's , setup, schedule and/or initiate indexing policy - // - - // - apply_indexing_policy (const dataObjInp_t* _inp) - // - - - void apply_indexing_policy( - const std::string & _rn, - ruleExecInfo_t* _rei, - std::list& _args) { - try { - std::string object_path; - std::string source_resource; - // NOTE:: 3rd parameter is the target - if("pep_api_data_obj_put_post" == _rn) { - auto it = _args.begin(); - std::advance(it, 2); - if(_args.end() == it) { - THROW( - SYS_INVALID_INPUT_PARAM, - "invalid number of arguments"); - } - - auto obj_inp = boost::any_cast(*it); - object_path = obj_inp->objPath; - - const char* resc_hier = getValByKey( - &obj_inp->condInput, - RESC_HIER_STR_KW); - if(!resc_hier) { - THROW(SYS_INVALID_INPUT_PARAM, "resc hier is null"); - } - - irods::hierarchy_parser parser; - parser.set_string(resc_hier); - parser.last_resc(source_resource); - - irods::indexing::indexer idx{_rei, config->instance_name_}; - idx.schedule_full_text_indexing_event( - object_path, - _rei->rsComm->clientUser.userName, - source_resource); - - const char* metadata_included = getValByKey(&obj_inp->condInput, METADATA_INCLUDED_KW); - if (metadata_included) - { - idx.schedule_metadata_indexing_event( - object_path, - _rei->rsComm->clientUser.userName, "a", "v", "u"); // "a","v","u" values are not significant; this is - // just a trigger to (re-)index all AVUs. Ref: #117 - } - } - else if("pep_api_data_obj_repl_post" == _rn) { - auto it = _args.begin(); - std::advance(it, 2); - if(_args.end() == it) { - THROW( - SYS_INVALID_INPUT_PARAM, - "invalid number of arguments"); - } - - auto obj_inp = boost::any_cast(*it); - object_path = obj_inp->objPath; - const char* resc_hier = getValByKey( - &obj_inp->condInput, - DEST_RESC_HIER_STR_KW); - if(!resc_hier) { - THROW(SYS_INVALID_INPUT_PARAM, "resc hier is null"); - } - - irods::hierarchy_parser parser; - parser.set_string(resc_hier); - parser.last_resc(source_resource); - - irods::indexing::indexer idx{_rei, config->instance_name_}; - idx.schedule_full_text_indexing_event( - object_path, - _rei->rsComm->clientUser.userName, - source_resource); - } - else if("pep_api_data_obj_open_post" == _rn || - "pep_api_data_obj_create_post" == _rn) { - auto it = _args.begin(); - std::advance(it, 2); - if(_args.end() == it) { - THROW( - SYS_INVALID_INPUT_PARAM, - "invalid number of arguments"); - } - - auto obj_inp = boost::any_cast(*it); - if(obj_inp->openFlags & O_WRONLY || obj_inp->openFlags & O_RDWR) { - int l1_idx{}; - std::string resource_name; - try { - std::tie(l1_idx, resource_name) = get_index_and_resource(obj_inp); - opened_objects[l1_idx] = std::tie(obj_inp->objPath, resource_name); - } - catch(const irods::exception& _e) { - rodsLog( - LOG_ERROR, - "get_index_and_resource failed for [%s]", - obj_inp->objPath); - } - } - } - else if("pep_api_data_obj_close_post" == _rn) { - //TODO :: only for create/write events - auto it = _args.begin(); - std::advance(it, 2); - if(_args.end() == it) { - THROW( - SYS_INVALID_INPUT_PARAM, - "invalid number of arguments"); - } - - const auto opened_inp = boost::any_cast(*it); - const auto l1_idx = opened_inp->l1descInx; - if(opened_objects.find(l1_idx) != opened_objects.end()) { - std::string object_path, resource_name; - std::tie(object_path, resource_name) = opened_objects[l1_idx]; - irods::indexing::indexer idx{_rei, config->instance_name_}; - idx.schedule_full_text_indexing_event( - object_path, - _rei->rsComm->clientUser.userName, - resource_name); - } - } - else if("pep_api_mod_avu_metadata_pre" == _rn) { - auto it = _args.begin(); - std::advance(it, 2); - if(_args.end() == it) { - THROW( - SYS_INVALID_INPUT_PARAM, - "invalid number of arguments"); - } - - const auto avu_inp = boost::any_cast(*it); - const std::string attribute{avu_inp->arg3}; - if(config->index != attribute) { - return; - } - - const std::string operation{avu_inp->arg0}; - const std::string type{avu_inp->arg1}; - const std::string object_path{avu_inp->arg2}; - const std::string add{"add"}; - const std::string set{"set"}; - const std::string collection{"-C"}; - - irods::indexing::indexer idx{_rei, config->instance_name_}; - if(operation == set || operation == add) { - if(type == collection) { - // was the added tag an indexing indicator - if(config->index == attribute) { - // verify that this is not new metadata with a query and set a flag - if (!avu_inp->arg3) { THROW( SYS_INVALID_INPUT_PARAM, "empty metadata attribute" ); } - if (!avu_inp->arg4) { THROW( SYS_INVALID_INPUT_PARAM, "empty metadata value" ); } - collection_metadata_is_new = !idx.metadata_exists_on_collection( - object_path, - avu_inp->arg3, - avu_inp->arg4, - NULL_PTR_GUARD(avu_inp->arg5)); - } - } - } - } - else if("pep_api_mod_avu_metadata_post" == _rn) { - auto it = _args.begin(); - std::advance(it, 2); - if(_args.end() == it) { - THROW( - SYS_INVALID_INPUT_PARAM, - "invalid number of arguments"); - } - - const auto avu_inp = boost::any_cast(*it); - const std::string operation{avu_inp->arg0}; - const std::string type{avu_inp->arg1}; - const std::string logical_path{avu_inp->arg2}; - - if (!avu_inp->arg3) { THROW( SYS_INVALID_INPUT_PARAM, "empty metadata attribute" ); } - if (!avu_inp->arg4) { THROW( SYS_INVALID_INPUT_PARAM, "empty metadata value" ); } - const std::string attribute{ avu_inp->arg3 }; - const std::string value{ avu_inp->arg4 }; - const std::string units{ NULL_PTR_GUARD(avu_inp->arg5) }; - - if(config->flag == attribute) { return; } - - const std::string add{"add"}; - const std::string set{"set"}; - const std::string rm{"rm"}; - const std::string rmw{"rmw"}; // yet to be implemented; AVU args are "like" patterns to be used in a genquery - const std::string collection{"-C"}; - const std::string data_object{"-d"}; - - irods::indexing::indexer idx{_rei, config->instance_name_}; - if(operation == rm) { - // removed index metadata from collection - if(type == collection) { - // was the removed tag an indexing indicator - if(config->index == attribute) { - // schedule a possible purge of all indexed data in collection - idx.schedule_collection_operation( - irods::indexing::operation_type::purge, - logical_path, - _rei->rsComm->clientUser.userName, - value, - units); - } - } - // removed a single indexed AVU on an object or collection - if(type == data_object || - (type == collection && config->index != attribute)) { - // schedule an AVU purge - idx.schedule_metadata_purge_event( - logical_path, - _rei->rsComm->clientUser.userName, - attribute, - value, - units); - } - } - else if(operation == set || operation == add) { - if(type == collection) { - // was the added tag an indexing indicator - if(config->index == attribute) { - // check the verify flag - if(collection_metadata_is_new) { - idx.schedule_collection_operation( - irods::indexing::operation_type::index, - logical_path, - _rei->rsComm->clientUser.userName, - value, - units); - } - } - } - if(type == data_object || - (type == collection && config->index != attribute)) { - idx.schedule_metadata_indexing_event( - logical_path, - _rei->rsComm->clientUser.userName, - attribute, - value, - units); - } - } - } - else if("pep_api_data_obj_unlink_pre" == _rn) { - auto it = _args.begin(); - std::advance(it, 2); - if(_args.end() == it) { - THROW( - SYS_INVALID_INPUT_PARAM, - "invalid number of arguments"); - } - const auto obj_inp = boost::any_cast(*it); - if (auto* p = getValByKey( &obj_inp->condInput, FORCE_FLAG_KW); p != 0) { rm_force_kw = p; } - indices_for_rm_coll = get_indices_for_delete_by_query (*_rei->rsComm, obj_inp->objPath, false); - } - else if("pep_api_data_obj_unlink_post" == _rn) { - auto it = _args.begin(); - std::advance(it, 2); - if(_args.end() == it) { - THROW( - SYS_INVALID_INPUT_PARAM, - "invalid number of arguments"); - } - const auto obj_inp = boost::any_cast(*it); - if ('*' != rm_force_kw[0]) { /* there was a force keyword */ - irods::indexing::indexer idx{_rei, config->instance_name_}; - nlohmann::json recurseInfo {{"is_collection",false}}; - recurseInfo["indices"] = indices_for_rm_coll; - idx.schedule_metadata_purge_for_recursive_rm_object (obj_inp->objPath, recurseInfo); - } - } - else if("pep_api_rm_coll_pre" == _rn) { - /** - * argument spec : - * [...?] - * - * before a collection is deleted. record whether FORCE_FLAG_KW is used. - * - **/ - auto it = _args.begin(); - std::advance(it, 2); - if(_args.end() == it) { - THROW( - SYS_INVALID_INPUT_PARAM, - "invalid number of arguments"); - } - CollInp*obj_inp = nullptr; - obj_inp = boost::any_cast(*it); - if (auto* p = getValByKey( &obj_inp->condInput, FORCE_FLAG_KW); p != 0) { rm_force_kw = p; } - indices_for_rm_coll = get_indices_for_delete_by_query( *_rei->rsComm, obj_inp->collName, true ); - } - else if("pep_api_rm_coll_post" == _rn) { - - /** - * argument spec : - * [...?] - * - * collection has been deleted successfully. If FORCE_FLAG_KW was used, then purge the - * from the relevant indexes collection recursively - */ - - if ('*' != rm_force_kw[0]) { /* there was a force keyword */ - auto it = _args.begin(); - std::advance(it, 2); - if(_args.end() == it) { - THROW( - SYS_INVALID_INPUT_PARAM, - "invalid number of arguments"); - } - CollInp* obj_inp = nullptr; - obj_inp = boost::any_cast(*it); - irods::indexing::indexer idx{_rei, config->instance_name_}; - nlohmann::json recurseInfo = {{"is_collection",true}}; - recurseInfo["indices"] = indices_for_rm_coll; - idx.schedule_metadata_purge_for_recursive_rm_object(obj_inp->collName, recurseInfo); - } - } - else if (_rn == "pep_api_atomic_apply_metadata_operations_pre" - || _rn == "pep_api_atomic_apply_metadata_operations_post") - { - using nlohmann::json; - auto pos = _rn.rfind("_p"); - auto when = _rn.substr(pos + 1); // "pre" or "post" - auto it = _args.begin(); - std::advance(it, 2); - - auto request = boost::any_cast(*it); - std::string requ_str {(const char*)request->buf,unsigned(request->len)}; - const auto & requ_json = json::parse( requ_str ); - - std::string obj_path = requ_json["entity_name"]; // logical path - std::string obj_type = requ_json["entity_type"]; // "data_object" or "collection" - if ("pre" == when) { - atomic_metadata_obj_path = obj_path; - } - else { - if (atomic_metadata_obj_path != obj_path) { - THROW( SYS_INVALID_INPUT_PARAM, fmt::format("invalid object path for {} operation",_rn)); - } - } - auto & map = atomic_metadata_tuples[ when ]; - - namespace fs = irods::experimental::filesystem; - std::string dobj_name {fs::path{obj_path}.object_name()}; - std::string dobj_parent {fs::path{obj_path}.parent_path()}; - - auto query_str = fmt::format( "SELECT META_{0}_ATTR_NAME, META_{0}_ATTR_VALUE, META_{0}_ATTR_UNITS where {0}_NAME = '{1}'", - (obj_type == "collection" ? "COLL" : "DATA"), - (obj_type == "collection" ? obj_path : dobj_name) ); - if (obj_type != "collection") { - query_str += fmt::format (" and COLL_NAME = '{}'", dobj_parent); - } - - irods::query qobj {_rei->rsComm, query_str}; - - for (const auto & row : qobj) { - map.insert( {row[0],row[1],row[2]} ); - } - if (when == "post") { - - std::vector avus_added_or_removed; - - const auto & pre_map = atomic_metadata_tuples[ "pre" ]; - set_symmetric_difference ( pre_map.begin(), pre_map.end(), - map.cbegin(), map.cend(), std::back_inserter(avus_added_or_removed)); - - for (const auto & [attribute, value, units]: avus_added_or_removed) { - if (attribute != config->index) { - irods::indexing::indexer idx{_rei, config->instance_name_}; - idx.schedule_metadata_indexing_event( - obj_path, - _rei->rsComm->clientUser.userName, - attribute, - value, - units); - break; // only need one event to re-index all AVU's - } - } - } - } - } - catch(const boost::bad_any_cast& _e) { - THROW( - INVALID_ANY_CAST, - boost::str(boost::format( - "function [%s] rule name [%s]") - % __FUNCTION__ % _rn)); - } - } // apply_indexing_policy - - // -=-=-=-= Invoke policy on object. uses - // - - // - apply_object_policy (root, obj_path, src_resc, indexer, index_name, index_type) - // - - // - (1) Composes a policy name from (root , indexer) - // - (2) Invokes the policy by that name upon (obj_path, src_resc, index_name, index_type) - // - - - void apply_object_policy( - ruleExecInfo_t* _rei, - const std::string& _policy_root, - const std::string& _object_path, - const std::string& _source_resource, - const std::string& _indexer, - const std::string& _index_name, - const std::string& _index_type) { - const std::string policy_name{irods::indexing::policy::compose_policy_name( - _policy_root, - _indexer)}; - - std::list args; - args.push_back(boost::any(_object_path)); - args.push_back(boost::any(_source_resource)); - args.push_back(boost::any(_index_name)); - args.push_back(boost::any(_index_type)); - irods::indexing::invoke_policy(_rei, policy_name, args); - - - } // apply_object_policy - - void apply_specific_policy( - ruleExecInfo_t* _rei, - const std::string& _policy_name, // request specific policy by name - const std::string& _object_path, - const std::string& _source_resource, - const std::string& _indexer, - const std::string& _index_name, - const std::string& _index_type) { - - std::list args; - args.push_back(boost::any(_object_path)); - args.push_back(boost::any(_source_resource)); - args.push_back(boost::any(_index_name)); - args.push_back(boost::any(_index_type)); - irods::indexing::invoke_policy(_rei, _policy_name, args); - - } // apply_specific_policy - -/***********/ - - std::string to_lowercase (const std::string & src) - { - std::string dst; - std::transform(src.begin(),src.end(), - std::back_inserter(dst), - [](wchar_t w) { return tolower(w); } ); - return dst; - } - - auto get_default_mime_type (const std::string & input) -> std::string - { - const static std::map default_mime_types - { - {".aac", "audio/aac"}, - {".abw", "application/x-abiword"}, - {".arc", "application/x-freearc"}, - {".avi", "video/x-msvideo"}, - {".azw", "application/vnd.amazon.ebook"}, - {".bin", "application/octet-stream"}, - {".bmp", "image/bmp"}, - {".bz", "application/x-bzip"}, - {".bz2", "application/x-bzip2"}, - {".cda", "application/x-cdf"}, - {".csh", "application/x-csh"}, - {".css", "text/css"}, - {".csv", "text/csv"}, - {".doc", "application/msword"}, - {".docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"}, - {".eot", "application/vnd.ms-fontobject"}, - {".epub", "application/epub+zip"}, - {".gz", "application/gzip"}, - {".gif", "image/gif"}, - {".htm", "text/html"}, - {".html", "text/html"}, - {".ico", "image/vnd.microsoft.icon"}, - {".ics", "text/calendar"}, - {".jar", "application/java-archive"}, - {".jpeg", "image/jpeg"}, - {".jpg", "image/jpeg"}, - {".js", "text/javascript"}, - {".json", "application/json"}, - {".jsonld", "application/ld+json"}, - {".mid", "audio/midi audio/x-midi"}, - {".midi", "audio/midi audio/x-midi"}, - {".mjs", "text/javascript"}, - {".mp3", "audio/mpeg"}, - {".mp4", "video/mp4"}, - {".mpeg", "video/mpeg"}, - {".mpkg", "application/vnd.apple.installer+xml"}, - {".odp", "application/vnd.oasis.opendocument.presentation"}, - {".ods", "application/vnd.oasis.opendocument.spreadsheet"}, - {".odt", "application/vnd.oasis.opendocument.text"}, - {".oga", "audio/ogg"}, - {".ogv", "video/ogg"}, - {".ogx", "application/ogg"}, - {".opus", "audio/opus"}, - {".otf", "font/otf"}, - {".png", "image/png"}, - {".pdf", "application/pdf"}, - {".php", "application/x-httpd-php"}, - {".ppt", "application/vnd.ms-powerpoint"}, - {".pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation"}, - {".rar", "application/vnd.rar"}, - {".rtf", "application/rtf"}, - {".sh", "application/x-sh"}, - {".svg", "image/svg+xml"}, - {".swf", "application/x-shockwave-flash"}, - {".tar", "application/x-tar"}, - {".tif", "image/tiff"}, - {".tiff", "image/tiff"}, - {".ts", "video/mp2t"}, - {".ttf", "font/ttf"}, - {".txt", "text/plain"}, - {".vsd", "application/vnd.visio"}, - {".wav", "audio/wav"}, - {".weba", "audio/webm"}, - {".webm", "video/webm"}, - {".webp", "image/webp"}, - {".woff", "font/woff"}, - {".woff2", "font/woff2"}, - {".xhtml", "application/xhtml+xml"}, - {".xls", "application/vnd.ms-excel"}, - {".xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"}, - {".xml", "application/xml"}, // if not readable from casual users (RFC 3023, section 3)" else "text/xml" - {".xul", "application/vnd.mozilla.xul+xml"}, - {".zip", "application/zip"}, - {".3gp", "video/3gpp"}, // audio/3gpp if it doesn't contain video, - {".3g2", "video/3gpp2"}, // audio/3gpp2 if it doesn't contain video, - {".7z", "application/x-7z-compressed"}, - }; - std::string retvalue {}; - if (auto offs = input.rfind("."); offs != std::string::npos) { - const std::string lower_case_ext = to_lowercase(input.substr(offs)); - try{ - retvalue = default_mime_types.at(lower_case_ext); - } - catch(const std::out_of_range & e) { - irods::log(LOG_DEBUG, fmt::format("In {}, function {}: Unknown extension [{}]",__FILE__,__func__,lower_case_ext)); - } - } - return retvalue.size() ? retvalue : "application/octet-stream"; - } - - auto get_system_metadata( ruleExecInfo_t* _rei - ,const std::string& _obj_path ) -> nlohmann::json { - using nlohmann::json; - const boost::filesystem::path p{_obj_path}; - const std::string parent_name = p.parent_path().string(); - const std::string name = p.filename().string(); - namespace fs = irods::experimental::filesystem; - namespace fsvr = irods::experimental::filesystem::server; - auto irods_path = fs::path{_obj_path}; - const auto s = fsvr::status(*_rei->rsComm, irods_path); - std::string query_str; - - json obj; - - obj["absolutePath"] = _obj_path; - - bool is_collection = false; - if (fsvr::is_data_object(s)) { - query_str = fmt::format("SELECT DATA_ID , DATA_MODIFY_TIME, DATA_ZONE_NAME, COLL_NAME, DATA_SIZE where DATA_NAME = '{0}'" - " and COLL_NAME = '{1}' ", name, parent_name ); - irods::query qobj{_rei->rsComm, query_str, 1}; - for (const auto & i:qobj) { - obj["lastModifiedDate"] = std::stol( i[1] ); // epoch seconds - obj["zoneName"] = i[2]; - obj["parentPath"] = i[3]; - obj["dataSize"] = std::stol( i[4] ); - obj["isFile"] = true; - break; - } - } - else if (fsvr::is_collection(s)) { - is_collection = true; - query_str = fmt::format("SELECT COLL_ID , COLL_MODIFY_TIME, COLL_ZONE_NAME, COLL_PARENT_NAME where COLL_NAME = '{0}'" - " and COLL_PARENT_NAME = '{1}' ", _obj_path, parent_name ); - irods::query qobj{_rei->rsComm, query_str, 1}; - for (const auto & i : qobj) { - obj["lastModifiedDate"] = std::stol( i[1] ); // epoch seconds - obj["zoneName"] = i[2]; - obj["parentPath"] = i[3]; - obj["dataSize"] = 0L; - obj["isFile"] = false; - break; - } - } - auto fileName = obj ["fileName"] = irods_path.object_name(); - obj ["url"] = fmt::format(fmt::runtime(config->urlTemplate), _obj_path); - obj["mimeType"] = (is_collection ? "" : get_default_mime_type (fileName)); - return obj; - - } // get_system_metadata - - void apply_metadata_policy( - ruleExecInfo_t* _rei, - const std::string& _policy_root, - const std::string& _object_path, - const std::string& _indexer, - const std::string& _index_name - // -- now used only to distinguish between object and avu purges - ,const std::string& _attribute - ,const std::string& _value - ,const std::string& _units - ) - { - const std::string policy_name { irods::indexing::policy::compose_policy_name( - _policy_root, - _indexer) }; - - - std::list args; - args.push_back(boost::any(_object_path)); - args.push_back(boost::any(_attribute)); // was attr // -- not explicit anymore with new schema as AVU's are cataloged - args.push_back(boost::any(_value)); // was value // within the indexed entry for the object itself - args.push_back(boost::any(_units)); // was units // As this is now a no-op, we should remove these arguments. - args.push_back(boost::any(_index_name)); - - args.push_back(boost::any(get_system_metadata(_rei, _object_path).dump())); - - irods::indexing::invoke_policy(_rei, policy_name, args); - - } // apply_metadata_policy + re->add_operation( + "rule_exists", std::function(rule_exists)); + re->add_operation&>( + "list_rules", std::function&)>(list_rules)); -} // namespace + re->add_operation&, irods::callback>( + "exec_rule", + std::function&, irods::callback)>(exec_rule)); + re->add_operation( + "exec_rule_text", + std::function( + exec_rule_text)); -irods::error start( - irods::default_re_ctx&, - const std::string& _instance_name ) { - RuleExistsHelper::Instance()->registerRuleRegex("pep_api_.*"); - config = std::make_unique(_instance_name); - rodsLog(LOG_DEBUG, "value of minimum_delay_time: %d", config->minimum_delay_time); - rodsLog(LOG_DEBUG, "value of maximum_delay_time: %d", config->maximum_delay_time); - rodsLog(LOG_DEBUG, "value of job_limit_per_collection_indexing_operation: %d", config->job_limit); - return SUCCESS(); -} // start - -irods::error stop( - irods::default_re_ctx&, - const std::string& ) { - return SUCCESS(); -} // stop - -irods::error rule_exists( - irods::default_re_ctx&, - const std::string& _rn, - bool& _ret) { - const std::set rules{ - "pep_api_atomic_apply_metadata_operations_pre", - "pep_api_atomic_apply_metadata_operations_post", - "pep_api_data_obj_open_post", - "pep_api_data_obj_create_post", - "pep_api_data_obj_repl_post", - "pep_api_data_obj_unlink_pre", - "pep_api_data_obj_unlink_post", - "pep_api_mod_avu_metadata_pre", - "pep_api_mod_avu_metadata_post", - "pep_api_data_obj_close_post", - "pep_api_data_obj_put_post", - "pep_api_phy_path_reg_post", - "pep_api_rm_coll_pre", - "pep_api_rm_coll_post", - }; - _ret = rules.find(_rn) != rules.end(); - - return SUCCESS(); -} // rule_exists - -irods::error list_rules(irods::default_re_ctx&, std::vector&) { - return SUCCESS(); -} // list_rules - -irods::error exec_rule( - irods::default_re_ctx&, - const std::string& _rn, - std::list& _args, - irods::callback _eff_hdlr) { - ruleExecInfo_t* rei{}; - const auto err = _eff_hdlr("unsafe_ms_ctx", &rei); - if(!err.ok()) { - return err; - } - try { - apply_indexing_policy(_rn, rei, _args); - } - catch(const std::invalid_argument& _e) { - irods::indexing::exception_to_rerror( - SYS_NOT_SUPPORTED, - _e.what(), - rei->rsComm->rError); - return ERROR( - SYS_NOT_SUPPORTED, - _e.what()); - } - catch(const std::domain_error& _e) { - irods::indexing::exception_to_rerror( - INVALID_ANY_CAST, - _e.what(), - rei->rsComm->rError); - return ERROR( - SYS_NOT_SUPPORTED, - _e.what()); - } - catch(const irods::exception& _e) { - irods::indexing::exception_to_rerror( - _e, - rei->rsComm->rError); - return irods::error(_e); - } - - return CODE(RULE_ENGINE_CONTINUE); - -} // exec_rule - -irods::error exec_rule_text( - irods::default_re_ctx&, - const std::string& _rule_text, - msParamArray_t* _ms_params, - const std::string& _out_desc, - irods::callback _eff_hdlr) { - using json = nlohmann::json; - - try { - // skip the first line: @external - std::string rule_text{_rule_text}; - if(_rule_text.find("@external") != std::string::npos) { - rule_text = _rule_text.substr(10); - } - const auto rule_obj = json::parse(rule_text); - const std::string& rule_engine_instance_name = rule_obj["rule-engine-instance-name"]; - // if the rule text does not have our instance name, fail - if(config->instance_name_ != rule_engine_instance_name) { - return ERROR( - SYS_NOT_SUPPORTED, - "instance name not found"); - } -#if 0 - // catalog / index drift correction - if(irods::indexing::schedule::indexing == - rule_obj["rule-engine-operation"]) { - ruleExecInfo_t* rei{}; - const auto err = _eff_hdlr("unsafe_ms_ctx", &rei); - if(!err.ok()) { - return err; - } - - const std::string& params = rule_obj["delay-parameters"]; - - json delay_obj; - delay_obj["rule-engine-operation"] = irods::indexing::policy::indexing; - - irods::indexing::indexer idx{rei, config->instance_name_}; - idx.schedule_indexing_policy( - delay_obj.dump(), - params); - } - else -#endif - { - return ERROR( - SYS_NOT_SUPPORTED, - "supported rule name not found"); - } - } - catch(const std::invalid_argument& _e) { - std::string msg{"Rule text is not valid JSON -- "}; - msg += _e.what(); - return ERROR( - SYS_NOT_SUPPORTED, - msg); - } - catch(const std::domain_error& _e) { - std::string msg{"Rule text is not valid JSON -- "}; - msg += _e.what(); - return ERROR( - SYS_NOT_SUPPORTED, - msg); - } - catch(const irods::exception& _e) { - return ERROR( - _e.code(), - _e.what()); - } - - return SUCCESS(); -} // exec_rule_text - -irods::error exec_rule_expression( - irods::default_re_ctx&, - const std::string& _rule_text, - msParamArray_t* _ms_params, - irods::callback _eff_hdlr) { - using json = nlohmann::json; - ruleExecInfo_t* rei{}; - const auto err = _eff_hdlr("unsafe_ms_ctx", &rei); - if(!err.ok()) { - return err; - } - - try { - const auto rule_obj = json::parse(_rule_text); - if(irods::indexing::policy::object::index == - rule_obj["rule-engine-operation"]) { - try { - // proxy for provided user name - const std::string& user_name = rule_obj["user-name"]; - rstrcpy( - rei->rsComm->clientUser.userName, - user_name.c_str(), - NAME_LEN); - - // - implement (full-text?) indexing on an individual object - // - as a delayed task. - // - - apply_object_policy( - rei, - irods::indexing::policy::object::index, - rule_obj["object-path"], - rule_obj["source-resource"], - rule_obj["indexer"], - rule_obj["index-name"], - rule_obj["index-type"]); - } - catch(const irods::exception& _e) { - printErrorStack(&rei->rsComm->rError); - return ERROR( - _e.code(), - _e.what()); - } - } - else if(irods::indexing::policy::object::purge == - rule_obj["rule-engine-operation"]) { - try { - // proxy for provided user name - const std::string& user_name = rule_obj["user-name"]; - rstrcpy( - rei->rsComm->clientUser.userName, - user_name.c_str(), - NAME_LEN); - - // - implement index purge on an individual object - // - as a delayed task. - // - - apply_object_policy( - rei, - irods::indexing::policy::object::purge, - rule_obj["object-path"], - rule_obj["source-resource"], - rule_obj["indexer"], - rule_obj["index-name"], - rule_obj["index-type"]); - } - catch(const irods::exception& _e) { - printErrorStack(&rei->rsComm->rError); - return ERROR( - _e.code(), - _e.what()); - } - } - else if(irods::indexing::policy::collection::index == - rule_obj["rule-engine-operation"]) { - - // - launch delayed task to handle indexing events under a collection - // - ( example : a new indexing AVU was placed on the collection ) - // - - irods::indexing::indexer idx{rei, config->instance_name_}; - idx.schedule_policy_events_for_collection( - irods::indexing::operation_type::index, - rule_obj["collection-name"], - rule_obj["user-name"], - rule_obj["indexer"], - rule_obj["index-name"], - rule_obj["index-type"]); - } - else if(irods::indexing::policy::collection::purge == - rule_obj["rule-engine-operation"]) { - - // - launch delayed task to handle indexing events under a collection - // - ( example : an indexing AVU was removed from the collection ) - // - - irods::indexing::indexer idx{rei, config->instance_name_}; - idx.schedule_policy_events_for_collection( - irods::indexing::operation_type::purge, - rule_obj["collection-name"], - rule_obj["user-name"], - rule_obj["indexer"], - rule_obj["index-name"], - rule_obj["index-type"]); - } - else if(irods::indexing::policy::metadata::index == - rule_obj["rule-engine-operation"]) { - try { - // proxy for provided user name - const std::string& user_name = rule_obj["user-name"]; - rstrcpy( - rei->rsComm->clientUser.userName, - user_name.c_str(), - NAME_LEN); - - apply_metadata_policy( - rei, - irods::indexing::policy::metadata::index, - rule_obj["object-path"], - rule_obj["indexer"], - rule_obj["index-name"] - , rule_obj["attribute"] - , rule_obj["value"] - , rule_obj["units"] - ); - } - catch(const irods::exception& _e) { - printErrorStack(&rei->rsComm->rError); - return ERROR( - _e.code(), - _e.what()); - } - } - else if(irods::indexing::policy::metadata::purge == - rule_obj["rule-engine-operation"]) { - try { - // proxy for provided user name - const std::string& user_name = rule_obj["user-name"]; - rstrcpy( - rei->rsComm->clientUser.userName, - user_name.c_str(), - NAME_LEN); - - apply_metadata_policy( - rei, - irods::indexing::policy::metadata::purge, - rule_obj["object-path"], - rule_obj["indexer"], - rule_obj["index-name"] - , rule_obj["attribute"] - , rule_obj["value"] - , rule_obj["units"] - ); - } - catch(const irods::exception& _e) { - printErrorStack(&rei->rsComm->rError); - return ERROR( - _e.code(), - _e.what()); - } - } - else if("irods_policy_recursive_rm_object_by_path" == rule_obj["rule-engine-operation"]) { - - const std::string& user_name = rule_obj["user-name"]; - rstrcpy( - rei->rsComm->clientUser.userName, - user_name.c_str(), - NAME_LEN); - - apply_specific_policy( - rei, - "irods_policy_recursive_rm_object_by_path", - rule_obj["object-path"], - rule_obj["source-resource"], - rule_obj["indexer"], - rule_obj["index-name"], - rule_obj["index-type"]); - } - else { - printErrorStack(&rei->rsComm->rError); - return ERROR( - SYS_NOT_SUPPORTED, - "supported rule name not found"); - } - } - catch(const json::parse_error& _e) { - rodsLog(LOG_ERROR,"Exception (%s). Could not parse JSON rule text @ FILE %s LINE %d FUNCTION %s ", - _e.what(),__FILE__,__LINE__,__FUNCTION__); - return CODE( RULE_ENGINE_CONTINUE); - } - catch(const std::invalid_argument& _e) { - return ERROR( - SYS_NOT_SUPPORTED, - _e.what()); - } - catch(const std::domain_error& _e) { - return ERROR( - SYS_NOT_SUPPORTED, - _e.what()); - } - catch(const irods::exception& _e) { - return ERROR( - _e.code(), - _e.what()); - } - - return SUCCESS(); - -} // exec_rule_expression - -extern "C" -irods::pluggable_rule_engine* plugin_factory( - const std::string& _inst_name, - const std::string& _context ) { - irods::pluggable_rule_engine* re = - new irods::pluggable_rule_engine( - _inst_name, - _context); - - re->add_operation< - irods::default_re_ctx&, - const std::string&>( - "start", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&)>(start)); - - re->add_operation< - irods::default_re_ctx&, - const std::string&>( - "stop", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&)>(stop)); - - re->add_operation< - irods::default_re_ctx&, - const std::string&, - bool&>( - "rule_exists", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - bool&)>(rule_exists)); - - re->add_operation< - irods::default_re_ctx&, - std::vector&>( - "list_rules", - std::function< - irods::error( - irods::default_re_ctx&, - std::vector&)>(list_rules)); - - re->add_operation< - irods::default_re_ctx&, - const std::string&, - std::list&, - irods::callback>( - "exec_rule", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - std::list&, - irods::callback)>(exec_rule)); - - re->add_operation< - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - const std::string&, - irods::callback>( - "exec_rule_text", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - const std::string&, - irods::callback)>(exec_rule_text)); - - re->add_operation< - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - irods::callback>( - "exec_rule_expression", - std::function< - irods::error( - irods::default_re_ctx&, - const std::string&, - msParamArray_t*, - irods::callback)>(exec_rule_expression)); - return re; + re->add_operation( + "exec_rule_expression", + std::function( + exec_rule_expression)); + return re; } // plugin_factory - diff --git a/packaging/test_plugin_indexing.py b/packaging/test_plugin_indexing.py index 1b432bf..b4efd49 100644 --- a/packaging/test_plugin_indexing.py +++ b/packaging/test_plugin_indexing.py @@ -166,9 +166,7 @@ class BooksUnknownError(RuntimeError): pass return retvalue -ES_VERSION = '7.x' -def es7_exactly (): return '8.' > ES_VERSION >= '7.' -def es7_or_later(): return ES_VERSION >= '7.' +ES_VERSION = '7.x' # TODO We probably don't need this anymore. @contextlib.contextmanager def indexing_plugin__installed(indexing_config=(), server_env_options={}): @@ -195,12 +193,6 @@ def indexing_plugin__installed(indexing_config=(), server_env_options={}): "read_size" : 4194304, "es_version" : ES_VERSION } - }, - { - "instance_name": "irods_rule_engine_plugin-document_type-instance", - "plugin_name": "irods_rule_engine_plugin-document_type", - "plugin_specific_configuration": { - } } ] irods_config.commit(irods_config.server_config, irods_config.server_config_path) @@ -245,23 +237,21 @@ def install_python3_virtualenv_with_python_irodsclient(PATH='~/py3',preTestPRCIn # Assuming use for metadata style of index only def search_index_for_avu_attribute_name(index_name, attr_name, port = ELASTICSEARCH_PORT): - maptype = "" if es7_or_later() else "/text" - track_num_hits_as_int = "&track_total_hits=true&rest_total_hits_as_int=true" if es7_exactly() else "" out,_,rc = lib.execute_command_permissive( dedent("""\ - curl -X GET -H'Content-Type: application/json' HTTP://localhost:{port}/{index_name}{maptype}/_search?pretty=true{track_num_hits_as_int} -d ' + curl -X GET -H'Content-Type: application/json' http://localhost:{port}/{index_name}/_search?track_total_hits=true&rest_total_hits_as_int=true -d ' {{ "from": 0, "size" : 500, "_source" : ["absolutePath", "metadataEntries"], "query" : {{ "nested": {{ - "path": "metadataEntries", - "query": {{ - "bool": {{ - "must": [ - {{ "match": {{ "metadataEntries.attribute": "{attr_name}" }} }} - ] + "path": "metadataEntries", + "query": {{ + "bool": {{ + "must": [ + {{ "match": {{ "metadataEntries.attribute": "{attr_name}" }} }} + ] + }} }} - }} }} }} }}' """).format(**locals())) @@ -273,9 +263,8 @@ def search_index_for_avu_attribute_name(index_name, attr_name, port = ELASTICSEA def search_index_for_object_path(index_name, path_component, extra_source_fields="", port = ELASTICSEARCH_PORT): path_component_matcher = ("*/" + path_component + ("/*" if not path_component.endswith("$") else "")).rstrip("$") - track_num_hits_as_int = "&track_total_hits=true&rest_total_hits_as_int=true" if es7_exactly() else "" out,_,rc = lib.execute_command_permissive( dedent("""\ - curl -X GET -H'Content-Type: application/json' HTTP://localhost:{port}/{index_name}/text/_search?pretty=true{track_num_hits_as_int} -d ' + curl -X GET -H'Content-Type: application/json' http://localhost:{port}/{index_name}/_search?track_total_hits=true&rest_total_hits_as_int=true -d ' {{ "from": 0, "size" : 500, "_source" : ["absolutePath" {extra_source_fields} ], @@ -294,9 +283,8 @@ def search_index_for_object_path(index_name, path_component, extra_source_fields # Assuming use for fulltext style of index only def search_index_for_All_object_paths(index_name, port = ELASTICSEARCH_PORT): - track_num_hits_as_int = "&track_total_hits=true&rest_total_hits_as_int=true" if es7_exactly() else "" out,_,rc = lib.execute_command_permissive( dedent("""\ - curl -X GET -H'Content-Type: application/json' HTTP://localhost:{port}/{index_name}/text/_search?pretty=true{track_num_hits_as_int} -d ' + curl -X GET -H'Content-Type: application/json' http://localhost:{port}/{index_name}/_search?track_total_hits=true&rest_total_hits_as_int=true -d ' {{ "from": 0, "size" : 500, "_source" : ["absolutePath", "data"], @@ -308,60 +296,45 @@ def search_index_for_All_object_paths(index_name, port = ELASTICSEARCH_PORT): return out def create_fulltext_index(index_name = DEFAULT_FULLTEXT_INDEX, port = ELASTICSEARCH_PORT): - OPTION = "?include_type_name" if es7_or_later() else "" #--> ES7 allows 'text' mapping but requires hint - lib.execute_command("curl -X PUT -H'Content-Type: application/json' http://localhost:{port}/{index_name}".format(**locals())) - lib.execute_command("curl -X PUT -H'Content-Type: application/json' http://localhost:{port}/{index_name}/_mapping/text{OPTION} ".format(**locals()) + - """ -d '{ "properties" : { "absolutePath" : { "type" : "keyword" }, "data" : { "type" : "text" } } }'""") + mapping = json.dumps({ + "mappings": { + "properties": { + "absolutePath": {"type": "keyword"}, + "data": {"type": "text"} + } + } + }) + lib.execute_command("curl -X PUT -H'Content-Type: application/json' http://localhost:{port}/{index_name} -d'{mapping}'".format(**locals())) return index_name def create_metadata_index(index_name = DEFAULT_METADATA_INDEX, port = ELASTICSEARCH_PORT): - OPTION = "" if es7_or_later() else "/text" #--> switch away from 'text' mapping if using >= ES7 - lib.execute_command("curl -X PUT -H'Content-Type: application/json' http://localhost:{port}/{index_name}".format(**locals())) - lib.execute_command("curl -X PUT -H'Content-Type: application/json' http://localhost:{port}/{index_name}/_mapping{OPTION} ".format(**locals()) + - """ -d '{ "properties" : { - "url": { - "type": "text" - }, - "zoneName": { - "type": "keyword" - }, - "absolutePath": { - "type": "keyword" - }, - "fileName": { - "type": "text" - }, - "parentPath": { - "type": "text" - }, - "isFile": { - "type": "boolean" - }, - "dataSize": { - "type": "long" - }, - "mimeType": { - "type": "keyword" - }, - "lastModifiedDate": { - "type": "date", - "format": "epoch_second" - }, - "metadataEntries": { - "type": "nested", - "properties": { - "attribute": { - "type": "keyword" - }, - "value": { - "type": "text" - }, - "unit": { - "type": "keyword" - } - } - } - }}' """) + mapping = json.dumps({ + "mappings": { + "properties": { + "url": {"type": "text"}, + "zoneName": {"type": "keyword"}, + "absolutePath": {"type": "keyword"}, + "fileName": {"type": "text"}, + "parentPath": {"type": "text"}, + "isFile": {"type": "boolean"}, + "dataSize": {"type": "long"}, + "mimeType": {"type": "keyword"}, + "lastModifiedDate": { + "type": "date", + "format": "epoch_second" + }, + "metadataEntries": { + "type": "nested", + "properties": { + "attribute": {"type": "keyword"}, + "value": {"type": "text"}, + "unit": {"type": "keyword"} + } + } + } + } + }) + lib.execute_command("curl -X PUT -H'Content-Type: application/json' http://localhost:{port}/{index_name} -d'{mapping}'".format(**locals())) return index_name def delete_fulltext_index(index_name = DEFAULT_FULLTEXT_INDEX, port = ELASTICSEARCH_PORT): diff --git a/plugin_specific_configuration.cpp b/plugin_specific_configuration.cpp index c5f1f88..4f593e4 100644 --- a/plugin_specific_configuration.cpp +++ b/plugin_specific_configuration.cpp @@ -1,53 +1,50 @@ - #include "plugin_specific_configuration.hpp" -#include + #include -#include +#include #include -#include - - -namespace irods { - namespace indexing { - plugin_specific_configuration get_plugin_specific_configuration( - const std::string& _instance_name ) { - try { - const auto& rule_engines = get_server_property< - const nlohmann::json&>( - std::vector{ - KW_CFG_PLUGIN_CONFIGURATION, - KW_CFG_PLUGIN_TYPE_RULE_ENGINE}); - for ( const auto& rule_engine : rule_engines ) { - const auto& inst_name = rule_engine.at( KW_CFG_INSTANCE_NAME).get_ref(); - if ( inst_name == _instance_name ) { - if(rule_engine.count(KW_CFG_PLUGIN_SPECIFIC_CONFIGURATION) > 0) { - return rule_engine.at(KW_CFG_PLUGIN_SPECIFIC_CONFIGURATION); - } // if has PSC - } // if inst_name - } // for rule_engines - - } catch ( const std::out_of_range& e ) { - THROW( KEY_NOT_FOUND, e.what() ); - - } catch ( const nlohmann::json::exception& _e ) { - irods::log(LOG_ERROR, fmt::format("[{}:{}] in [file={}] - json exception occurred [error={}], [instance={}]", - __func__,__LINE__,__FILE__, _e.what(), _instance_name)); - THROW( SYS_LIBRARY_ERROR, _e.what() ); - - } catch ( const std::exception& e ) { - rodsLog(LOG_ERROR, "General exception in %s - %s", __func__, e.what()); - THROW(SYS_INTERNAL_ERR, e.what()); - - } catch ( ... ) { - THROW( SYS_UNKNOWN_ERROR, fmt::format( "Function {} File {} Line {}",__func__,__FILE__,__LINE__)); - } - - THROW( - SYS_INVALID_INPUT_PARAM, - boost::format("failed to find configuration for indexing plugin [%s]") % - _instance_name); - } // get_plugin_specific_configuration +#include - } // namespace indexing -} // namespace irods +namespace irods::indexing +{ + plugin_specific_configuration get_plugin_specific_configuration(const std::string& _instance_name) + { + try { + const auto& rule_engines = get_server_property( + std::vector{KW_CFG_PLUGIN_CONFIGURATION, KW_CFG_PLUGIN_TYPE_RULE_ENGINE}); + for (const auto& rule_engine : rule_engines) { + const auto& inst_name = rule_engine.at(KW_CFG_INSTANCE_NAME).get_ref(); + if (inst_name == _instance_name) { + if (rule_engine.count(KW_CFG_PLUGIN_SPECIFIC_CONFIGURATION) > 0) { + return rule_engine.at(KW_CFG_PLUGIN_SPECIFIC_CONFIGURATION); + } + } + } + } + catch (const std::out_of_range& e) { + THROW(KEY_NOT_FOUND, e.what()); + } + catch (const nlohmann::json::exception& _e) { + rodsLog(LOG_ERROR, + fmt::format("[{}:{}] in [file={}] - json exception occurred [error={}], [instance={}]", + __func__, + __LINE__, + __FILE__, + _e.what(), + _instance_name) + .c_str()); + THROW(SYS_LIBRARY_ERROR, _e.what()); + } + catch (const std::exception& e) { + rodsLog(LOG_ERROR, "General exception in %s - %s", __func__, e.what()); + THROW(SYS_INTERNAL_ERR, e.what()); + } + catch (...) { + THROW(SYS_UNKNOWN_ERROR, fmt::format("Function {} File {} Line {}", __func__, __FILE__, __LINE__)); + } + + THROW(SYS_INVALID_INPUT_PARAM, + fmt::format("failed to find configuration for indexing plugin [{}]", _instance_name)); + } // get_plugin_specific_configuration +} // namespace irods::indexing diff --git a/plugin_specific_configuration.hpp b/plugin_specific_configuration.hpp index b6a2994..51dcebb 100644 --- a/plugin_specific_configuration.hpp +++ b/plugin_specific_configuration.hpp @@ -1,14 +1,18 @@ -#ifndef PLUGIN_SPECIFIC_CONFIGURATION_HPP -#define PLUGIN_SPECIFIC_CONFIGURATION_HPP -#include +#ifndef IRODS_CAPABILITY_INDEXING_PLUGIN_SPECIFIC_CONFIGURATION_HPP +#define IRODS_CAPABILITY_INDEXING_PLUGIN_SPECIFIC_CONFIGURATION_HPP + #include #include + #include -namespace irods { - namespace indexing { - using plugin_specific_configuration = nlohmann::json; - plugin_specific_configuration get_plugin_specific_configuration(const std::string& _instance_name); - } // namespace indexing -} // namespace irods -#endif // PLUGIN_SPECIFIC_CONFIGURATION_HPP +#include + +namespace irods::indexing +{ + using plugin_specific_configuration = nlohmann::json; + + plugin_specific_configuration get_plugin_specific_configuration(const std::string& _instance_name); +} // namespace irods::indexing + +#endif // IRODS_CAPABILITY_INDEXING_PLUGIN_SPECIFIC_CONFIGURATION_HPP diff --git a/utilities.cpp b/utilities.cpp index b00ca81..5170aed 100644 --- a/utilities.cpp +++ b/utilities.cpp @@ -1,76 +1,57 @@ - #include "utilities.hpp" -#include - -namespace irods { - namespace indexing { - - // -=-=-= exception and error reporting - - void exception_to_rerror( - const irods::exception& _exception, - rError_t& _error) { - - std::string msg; - for(const auto& i : _exception.message_stack()) { - msg += i; - } - - addRErrorMsg( - &_error, - _exception.code(), - msg.c_str()); - } // exception_to_rerror - - void exception_to_rerror( - const int _code, - const char* _what, - rError_t& _error) { - addRErrorMsg( - &_error, - _code, - _what); - } // exception_to_rerror - static std::string collapse_error_stack( - rError_t& _error) { - std::stringstream ss; - for(int i = 0; i < _error.len; ++i) { - rErrMsg_t* err_msg = _error.errMsg[i]; - if(err_msg->status != STDOUT_STATUS) { - ss << "status: " << err_msg->status << " "; - } - - ss << err_msg->msg << " - "; - } - return ss.str(); - } // collapse_error_stack - - - // -=-=-=-= invoke policy(action) by name - - void invoke_policy( - ruleExecInfo_t* _rei, - const std::string& _action, - std::list _args) { - irods::rule_engine_context_manager< - irods::unit, - ruleExecInfo_t*, - irods::AUDIT_RULE> re_ctx_mgr( - irods::re_plugin_globals->global_re_mgr, - _rei); - irods::error err = re_ctx_mgr.exec_rule(_action, irods::unpack(_args)); - - if(!err.ok()) { - if(_rei->status < 0) { - std::string msg = collapse_error_stack(_rei->rsComm->rError); - THROW(_rei->status, msg); - } - - THROW(err.code(), err.result()); - } +#include - } // invoke_policy - - } // namespace indexing -} // namespace irods +namespace irods::indexing +{ + // -=-=-= exception and error reporting + + void exception_to_rerror(const irods::exception& _exception, rError_t& _error) + { + std::string msg; + for (const auto& i : _exception.message_stack()) { + msg += i; + } + + addRErrorMsg(&_error, _exception.code(), msg.c_str()); + } // exception_to_rerror + + void exception_to_rerror(const int _code, const char* _what, rError_t& _error) + { + addRErrorMsg(&_error, _code, _what); + } // exception_to_rerror + + static std::string collapse_error_stack(rError_t& _error) + { + std::stringstream ss; + + for (int i = 0; i < _error.len; ++i) { + rErrMsg_t* err_msg = _error.errMsg[i]; + if (err_msg->status != STDOUT_STATUS) { + ss << "status: " << err_msg->status << " "; + } + + ss << err_msg->msg << " - "; + } + + return ss.str(); + } // collapse_error_stack + + // -=-=-=-= invoke policy(action) by name + + void invoke_policy(ruleExecInfo_t* _rei, const std::string& _action, std::list _args) + { + irods::rule_engine_context_manager re_ctx_mgr( + irods::re_plugin_globals->global_re_mgr, _rei); + irods::error err = re_ctx_mgr.exec_rule(_action, irods::unpack(_args)); + + if (!err.ok()) { + if (_rei->status < 0) { + std::string msg = collapse_error_stack(_rei->rsComm->rError); + THROW(_rei->status, msg); + } + + THROW(err.code(), err.result()); + } + } // invoke_policy +} // namespace irods::indexing diff --git a/utilities.hpp b/utilities.hpp index 052ded7..bb6df9b 100644 --- a/utilities.hpp +++ b/utilities.hpp @@ -1,44 +1,38 @@ -#ifndef UTILITIES_HPP -#define UTILITIES_HPP +#ifndef IRODS_CAPABILITY_INDEXING_UTILITIES_HPP +#define IRODS_CAPABILITY_INDEXING_UTILITIES_HPP #include #include #include -namespace irods { - namespace indexing { - - const std::string indexer_separator{"::"}; - - inline - auto parse_indexer_string( const std::string& _indexer_string) -> std::tuple - { - const auto pos = _indexer_string.find_last_of(indexer_separator); - if(std::string::npos == pos) { - THROW( - SYS_INVALID_INPUT_PARAM, - boost::format("[%s] does not include an index separator for collection") - % _indexer_string); - } - const auto index_name = _indexer_string.substr(0, pos-(indexer_separator.size()-1)); - const auto index_type = _indexer_string.substr(pos+1); - return std::make_tuple(index_name, index_type); - } - - void exception_to_rerror( - const irods::exception& _exception, - rError_t& _error); - - void exception_to_rerror( - const int _code, - const char* _what, - rError_t& _error); - - void invoke_policy( - ruleExecInfo_t* _rei, - const std::string& _action, - std::list _args); - } // namespace indexing -} // namespace irods - -#endif // UTILITIES_HPP +#include +#include + +#include +#include +#include + +namespace irods::indexing +{ + const std::string indexer_separator{"::"}; + + inline auto parse_indexer_string(const std::string& _indexer_string) -> std::tuple + { + const auto pos = _indexer_string.find_last_of(indexer_separator); + if (std::string::npos == pos) { + THROW(SYS_INVALID_INPUT_PARAM, + fmt::format("[{}] does not include an index separator for collection", _indexer_string)); + } + const auto index_name = _indexer_string.substr(0, pos - (indexer_separator.size() - 1)); + const auto index_type = _indexer_string.substr(pos + 1); + return std::make_tuple(index_name, index_type); + } + + void exception_to_rerror(const irods::exception& _exception, rError_t& _error); + + void exception_to_rerror(const int _code, const char* _what, rError_t& _error); + + void invoke_policy(ruleExecInfo_t* _rei, const std::string& _action, std::list _args); +} // namespace irods::indexing + +#endif // IRODS_CAPABILITY_INDEXING_UTILITIES_HPP