Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#128] Update for iRODS 4.3.1 (main) #131

Merged
merged 10 commits into from
Feb 22, 2024
Merged
4 changes: 0 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ set(CPACK_ARCHIVE_COMPONENT_INSTALL ON)

include(${CMAKE_SOURCE_DIR}/indexing.cmake)
include(${CMAKE_SOURCE_DIR}/elasticsearch.cmake)
include(${CMAKE_SOURCE_DIR}/document_type.cmake)

list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${CMAKE_INSTALL_LIBDIR}/irods")
list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${IRODS_PLUGINS_DIRECTORY}")
Expand All @@ -111,7 +110,4 @@ list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INS
list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${IRODS_HOME_DIRECTORY}/scripts/irods")
list(APPEND CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX}${IRODS_HOME_DIRECTORY}/scripts/irods/test")

set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_SOURCE_DIR}/packaging/postinst;")
set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_SOURCE_DIR}/packaging/postinst")

include(CPack)
47 changes: 17 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,31 +27,24 @@ By default, should no resource be tagged it is assumed that all resources are av
There are currently three rule engine plugins to configure for the indexing capability which should be added to the `"rule_engines"` section of `/etc/irods/server_config.json`:

```
"rule_engines": [
{
"instance_name": "irods_rule_engine_plugin-indexing-instance",
"plugin_name": "irods_rule_engine_plugin-indexing",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-elasticsearch-instance",
"plugin_name": "irods_rule_engine_plugin-elasticsearch",
"plugin_specific_configuration": {
"hosts" : ["http://localhost:9200/"],
"bulk_count" : 100,
"read_size" : 4194304
}
},
{
"instance_name": "irods_rule_engine_plugin-document_type-instance",
"plugin_name": "irods_rule_engine_plugin-document_type",
"plugin_specific_configuration": {
}
},
]
"rule_engines": [
{
"instance_name": "irods_rule_engine_plugin-indexing-instance",
"plugin_name": "irods_rule_engine_plugin-indexing",
"plugin_specific_configuration": {}
},
{
"instance_name": "irods_rule_engine_plugin-elasticsearch-instance",
"plugin_name": "irods_rule_engine_plugin-elasticsearch",
"plugin_specific_configuration": {
"hosts": ["http://localhost:9200/"],
"bulk_count": 100,
"read_size": 4194304
}
}
]
```
The first is the main indexing rule engine plugin, the second is the plugin responsible for implementing the policy for the indexing technology, and the third is responsible for implementing the document type introspection. Currently the default imply returns `text` as the document type. This policy can be overridden to call out to services like Tika for a better introspection of the data.
The first is the main indexing rule engine plugin and the second is the plugin responsible for implementing the policy for the indexing technology.

Within each plugin configuration stanza, the "plugin_specific_configuration" object may contain a number of key-value pairs. The following pairs are currently applicable for the purpose of setting the indexing capability's operating parameters:

Expand Down Expand Up @@ -79,12 +72,6 @@ irods_policy_indexing_metadata_index_<technology>
irods_policy_indexing_metadata_purge_<technology>
```

### Document Type Policy

```
irods_policy_indexing_document_type_<technology>
```

### Plugin Testing

Caveats:
Expand Down
163 changes: 88 additions & 75 deletions configuration.cpp
Original file line number Diff line number Diff line change
@@ -1,90 +1,103 @@

#include "configuration.hpp"

#include "plugin_specific_configuration.hpp"
#include <fmt/format.h>

#include <irods/rodsLog.h>
#include <irods/irods_log.hpp>

namespace irods {
namespace indexing {
configuration::configuration(
const std::string& _instance_name ) :
instance_name_{_instance_name} {
try {
auto cfg = get_plugin_specific_configuration(_instance_name);
auto capture_parameter = [&](const std::string& _param, std::string& _attr) {
if (const auto iter = cfg.find(_param); iter != cfg.end()) {
_attr = iter->get<std::string>();
}
}; // capture_parameter
#include <fmt/format.h>

namespace irods::indexing
{
configuration::configuration(const std::string& _instance_name)
: instance_name{_instance_name}
{
try {
auto cfg = get_plugin_specific_configuration(_instance_name);
auto capture_parameter = [&](const std::string& _param, std::string& _attr) {
if (const auto iter = cfg.find(_param); iter != cfg.end()) {
_attr = iter->get<std::string>();
}
}; // capture_parameter

// integer-or-string parameters
// integer-or-string parameters

using configuration_parameters::load;
using configuration_parameters::load;

job_limit = load<int>(cfg, "job_limit_per_collection_indexing_operation", 1000);
minimum_delay_time = load<int>(cfg, "minimum_delay_time", 1);
maximum_delay_time = load<int>(cfg, "maximum_delay_time", 30);
job_limit = load<int>(cfg, "job_limit_per_collection_indexing_operation", 1000);
minimum_delay_time = load<int>(cfg, "minimum_delay_time", 1);
maximum_delay_time = load<int>(cfg, "maximum_delay_time", 30);

// string parameters
// string parameters

capture_parameter("index", index);
capture_parameter("url_template", urlTemplate);
capture_parameter("delay_parameters", delay_parameters);
capture_parameter("collection_test_flag", collection_test_flag);
} catch ( const exception& _e ) {
THROW( KEY_NOT_FOUND, fmt::format("[{}:{}] - [{}] [error_code=[{}], instance_name=[{}]",
__func__, __LINE__, _e.client_display_what(), _e.code(), _instance_name));
} catch ( const nlohmann::json::exception& _e ) {
irods::log( LOG_ERROR,
fmt::format("[{}:{}] in [file={}] - json exception occurred [error={}], [instance_name={}]",
__func__,__LINE__,__FILE__, _e.what(), _instance_name));
THROW( SYS_LIBRARY_ERROR, _e.what() );
} catch ( const std::exception& _e ) {
THROW( SYS_INTERNAL_ERR,
fmt::format("[{}:{}] in [file={}] - general exception occurred [error={}], [instance_name={}]",
__func__,__LINE__,__FILE__, _e.what(), _instance_name));
} catch ( ... ) {
THROW( SYS_UNKNOWN_ERROR,
fmt::format( "[{}:{}] in [file={}], [instance_name={}]",__func__,__LINE__,__FILE__,_instance_name));
}
capture_parameter("index", index);
capture_parameter("delay_parameters", delay_parameters);
capture_parameter("collection_test_flag", collection_test_flag);
}
catch (const exception& _e) {
THROW(KEY_NOT_FOUND,
fmt::format("[{}:{}] - [{}] [error_code=[{}], instance_name=[{}]",
__func__,
__LINE__,
_e.client_display_what(),
_e.code(),
_instance_name));
}
catch (const nlohmann::json::exception& _e) {
THROW(SYS_LIBRARY_ERROR,
fmt::format("[{}:{}] in [file={}] - json exception occurred [error={}], [instance_name={}]",
__func__,
__LINE__,
__FILE__,
_e.what(),
_instance_name));
}
catch (const std::exception& _e) {
THROW(SYS_INTERNAL_ERR,
fmt::format("[{}:{}] in [file={}] - general exception occurred [error={}], [instance_name={}]",
__func__,
__LINE__,
__FILE__,
_e.what(),
_instance_name));
}
catch (...) {
THROW(
SYS_UNKNOWN_ERROR,
fmt::format("[{}:{}] in [file={}], [instance_name={}]", __func__, __LINE__, __FILE__, _instance_name));
}
} // ctor configuration

} // ctor configuration
namespace policy
{
std::string compose_policy_name(const std::string& _prefix, const std::string& _technology)
{
return fmt::format("{}_{}", _prefix, _technology);
}
} // namespace policy

namespace policy {
std::string compose_policy_name(
const std::string& _prefix,
const std::string& _technology) {
return _prefix+"_"+_technology;
}
}
std::string operation_and_index_types_to_policy_name(const std::string& _operation_type,
const std::string& _index_type)
{
if (operation_type::index == _operation_type) {
if (index_type::full_text == _index_type) {
return policy::object::index;
}

std::string operation_and_index_types_to_policy_name(
const std::string& _operation_type,
const std::string& _index_type) {
if(operation_type::index == _operation_type) {
if(index_type::full_text == _index_type) {
return policy::object::index;
}
else if(index_type::metadata == _index_type) {
return policy::metadata::index;
}
}
else if(operation_type::purge == _operation_type) {
if(index_type::full_text == _index_type) {
return policy::object::purge;
}
else if(index_type::metadata == _index_type) {
return policy::metadata::purge;
}
} // else
if (index_type::metadata == _index_type) {
return policy::metadata::index;
}
}
else if (operation_type::purge == _operation_type) {
if (index_type::full_text == _index_type) {
return policy::object::purge;
}

THROW(
SYS_INVALID_INPUT_PARAM,
boost::format("operation [%s], index [%s]")
% _operation_type
% _index_type);
} // operation_and_index_types_to_policy_name
} // namespace indexing
} // namepsace irods
if (index_type::metadata == _index_type) {
return policy::metadata::purge;
}
}

THROW(SYS_INVALID_INPUT_PARAM, fmt::format("operation [{}], index [{}]", _operation_type, _index_type));
} // operation_and_index_types_to_policy_name
} // namespace irods::indexing
Loading
Loading