From 43acbc4b4371b73b623170a415fe2c1eab992963 Mon Sep 17 00:00:00 2001 From: klingbolt Date: Sun, 10 Nov 2024 21:56:47 -0500 Subject: [PATCH] Addressing code review. Changes to optional string class. Changed how conflicts are communicated --- .../testcase.yaml | 13 ++- xml_converter/src/packaging_xml.cpp | 27 ++++--- xml_converter/src/string_helper.cpp | 13 --- xml_converter/src/string_helper.hpp | 3 +- xml_converter/src/xml_converter.cpp | 80 ++++++++++++++----- 5 files changed, 86 insertions(+), 50 deletions(-) diff --git a/xml_converter/integration_tests/test_cases/proto_and_xml_input_no_duplicates/testcase.yaml b/xml_converter/integration_tests/test_cases/proto_and_xml_input_no_duplicates/testcase.yaml index 19554081..d36081b1 100644 --- a/xml_converter/integration_tests/test_cases/proto_and_xml_input_no_duplicates/testcase.yaml +++ b/xml_converter/integration_tests/test_cases/proto_and_xml_input_no_duplicates/testcase.yaml @@ -3,9 +3,14 @@ input_paths: "pack2": "proto" "pack3": "xml" expected_stdout: | - The following top level categories were found in more than one pack - mycategory - mycategory - Did not write due to duplicates in categories. If you want to bypass this, use '--allow-duplicates' + Did not write due to duplicates in categories. + This commonly occurs when attempting to read the same pack multiple times or when separate packs coincidentally have the same name. + Please remove one of the packs or edit the name of the packs' top level category before running the program again. + If you want to bypass this stop, use '--allow-duplicates'. + The following top level categories were found in more than one pack: + mycategory in files + pack/xml_file.xml + pack3/xml_file.xml + pack2/markers.bin expected_stderr: | expected_returncode: 0 \ No newline at end of file diff --git a/xml_converter/src/packaging_xml.cpp b/xml_converter/src/packaging_xml.cpp index 604ded0e..6675c2d3 100644 --- a/xml_converter/src/packaging_xml.cpp +++ b/xml_converter/src/packaging_xml.cpp @@ -26,28 +26,35 @@ OptionalString parse_marker_categories( int depth = 0) { OptionalString name = { "", // value - false, // error + false, // is_null }; if (get_node_name(node) == "MarkerCategory") { rapidxml::xml_attribute<>* name_attribute = find_attribute(node, "name"); if (name_attribute == 0) { // TODO: This error should really be for the entire node not just the name errors->push_back(new XMLNodeNameError("Category attribute 'name' is missing so it cannot be properly referenced", node)); - name.error = true; // TODO: Maybe fall back on display name slugification. - name.value = "UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER); + name = { + "UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER), + false, + }; UNKNOWN_CATEGORY_COUNTER++; } else { - name.value = lowercase(get_attribute_value(name_attribute)); + name = { + lowercase(get_attribute_value(name_attribute)), + false, + }; } if (name.value == "") { errors->push_back(new XMLNodeNameError("Category attribute 'name' is an empty string so it cannot be properly referenced", node)); - name.error = true; // TODO: Maybe fall back on display name slugification. - name.value = "UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER); + name = { + "UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER), + false, + }; UNKNOWN_CATEGORY_COUNTER++; } @@ -64,7 +71,6 @@ OptionalString parse_marker_categories( category = &existing_category_search->second; if (category->parent != parent) { errors->push_back(new XMLNodeNameError("Category somehow has a different parent then it used to. This might be a bug in xml_converter", node)); - name.error = true; } } @@ -92,7 +98,10 @@ OptionalString parse_marker_categories( } else { errors->push_back(new XMLNodeNameError("Unknown MarkerCategory Tag", node)); - name.error = true; + name = { + "", + true, + }; return name; } } @@ -221,7 +230,7 @@ set parse_xml_file(string xml_filepath, const string marker_pack_root_di for (rapidxml::xml_node<>* node = root_node->first_node(); node; node = node->next_sibling()) { if (get_node_name(node) == "MarkerCategory") { OptionalString name = parse_marker_categories(node, marker_categories, nullptr, &errors, &state); - if (name.error == false) { + if (name.is_null == false) { category_names.insert(name.value); } } diff --git a/xml_converter/src/string_helper.cpp b/xml_converter/src/string_helper.cpp index ec461457..fc498b87 100644 --- a/xml_converter/src/string_helper.cpp +++ b/xml_converter/src/string_helper.cpp @@ -315,16 +315,3 @@ std::string long_to_hex_string(uint64_t number) { return hex_string; } - -void combine_sets( - std::set* set_a, - std::set* set_b, - std::vector* duplicates) { - for (string str : *set_a) { - if (auto search = set_b->find(str); search != set_b->end()) { - duplicates->push_back(str); - } - else - set_b->insert(str); - } -} diff --git a/xml_converter/src/string_helper.hpp b/xml_converter/src/string_helper.hpp index 79df0b27..81f4d529 100644 --- a/xml_converter/src/string_helper.hpp +++ b/xml_converter/src/string_helper.hpp @@ -8,7 +8,7 @@ struct OptionalString { std::string value; - bool error; + bool is_null; }; bool matches_any(std::string test, std::initializer_list list); @@ -30,4 +30,3 @@ bool has_suffix(std::string const& fullString, std::string const& ending); std::string join_file_paths(const std::string& path_a, const std::string& path_b); std::string long_to_hex_string(uint64_t number); -void combine_sets(std::set* set_a, std::set* set_b, std::vector* duplicates); diff --git a/xml_converter/src/xml_converter.cpp b/xml_converter/src/xml_converter.cpp index 948005c8..2392d950 100644 --- a/xml_converter/src/xml_converter.cpp +++ b/xml_converter/src/xml_converter.cpp @@ -62,48 +62,62 @@ vector get_files_by_suffix(string directory, string suffix) { return files; } -set read_taco_directory( +map> read_taco_directory( string input_path, map* marker_categories, vector* parsed_pois) { - set top_level_categories; + map> category_to_filepath; if (!filesystem::exists(input_path)) { cout << "Error: " << input_path << " is not an existing directory or file" << endl; } else if (filesystem::is_directory(input_path)) { + string directory_name = filesystem::path(input_path).filename(); vector xml_files = get_files_by_suffix(input_path, ".xml"); for (const string& path : xml_files) { set category_names = (parse_xml_file(path, input_path, marker_categories, parsed_pois)); - top_level_categories.insert(category_names.begin(), category_names.end()); + string relative_path = directory_name + path.substr(input_path.size()); + for (set::iterator it = category_names.begin(); it != category_names.end(); it++) { + category_to_filepath[*it].push_back(relative_path); + } } } else if (filesystem::is_regular_file(input_path)) { set category_names = parse_xml_file(input_path, get_base_dir(input_path), marker_categories, parsed_pois); - top_level_categories.insert(category_names.begin(), category_names.end()); + string filename = filesystem::path(input_path).filename(); + for (set::iterator it = category_names.begin(); it != category_names.end(); it++) { + category_to_filepath[*it].push_back(filename); + } } - return top_level_categories; + return category_to_filepath; } -set read_burrito_directory( +map> read_burrito_directory( string input_path, map* marker_categories, vector* parsed_pois) { - set top_level_categories; + map> category_to_filepath; if (!filesystem::exists(input_path)) { cout << "Error: " << input_path << " is not an existing directory or file" << endl; } else if (filesystem::is_directory(input_path)) { + string directory_name = filesystem::path(input_path).filename(); vector burrito_files = get_files_by_suffix(input_path, ".bin"); for (const string& path : burrito_files) { set category_names = read_protobuf_file(path, input_path, marker_categories, parsed_pois); - top_level_categories.insert(category_names.begin(), category_names.end()); + string relative_path = directory_name + path.substr(input_path.size()); + for (set::iterator it = category_names.begin(); it != category_names.end(); it++) { + category_to_filepath[*it].push_back(relative_path); + } } } else if (filesystem::is_regular_file(input_path)) { set category_names = read_protobuf_file(input_path, get_base_dir(input_path), marker_categories, parsed_pois); - top_level_categories.insert(category_names.begin(), category_names.end()); + string filename = filesystem::path(input_path).filename(); + for (set::iterator it = category_names.begin(); it != category_names.end(); it++) { + category_to_filepath[*it].push_back(filename); + } } - return top_level_categories; + return category_to_filepath; } void write_taco_directory( @@ -158,19 +172,22 @@ void process_data( // All of the loaded pois and categories vector parsed_pois; map marker_categories; - set top_level_categories; - vector duplicate_categories; + map>> category_to_marker_pack_names; + map> duplicate_categories; // Read in all the xml taco markerpacks auto begin = chrono::high_resolution_clock::now(); for (size_t i = 0; i < input_taco_paths.size(); i++) { cout << "Loading taco pack " << input_taco_paths[i] << endl; - set category_names = read_taco_directory( + // swap this to map >) + map> category_names = read_taco_directory( input_taco_paths[i], &marker_categories, &parsed_pois); - combine_sets(&category_names, &top_level_categories, &duplicate_categories); + for (map>::iterator it = category_names.begin(); it != category_names.end(); it++) { + category_to_marker_pack_names[it->first].push_back(it->second); + } } auto end = chrono::high_resolution_clock::now(); auto dur = end - begin; @@ -181,18 +198,37 @@ void process_data( for (size_t i = 0; i < input_guildpoint_paths.size(); i++) { cout << "Loading guildpoint pack " << input_guildpoint_paths[i] << endl; - set category_names = read_burrito_directory( + map> category_names = read_burrito_directory( input_guildpoint_paths[i], &marker_categories, &parsed_pois); - combine_sets(&category_names, &top_level_categories, &duplicate_categories); + for (map>::iterator it = category_names.begin(); it != category_names.end(); it++) { + category_to_marker_pack_names[it->first].push_back(it->second); + } } - if (duplicate_categories.size() > 0 && allow_duplicates != true) { - cout << "Did not write due to duplicates in categories. If you want to bypass this, use '--allow-duplicates'" << endl; - cout << "The following top level categories were found in more than one pack" << endl; - for (size_t i = 0; i < duplicate_categories.size(); i++) { - cout << duplicate_categories[i] << endl; + for (map>>::iterator it = category_to_marker_pack_names.begin(); it != category_to_marker_pack_names.end(); it++) { + if (it->second.size() != 1) { + for (size_t i = 0; i < it->second.size(); i++) { + for (size_t j = 0; j < it->second[i].size(); j++) { + duplicate_categories[it->first].push_back(it->second[i][j]); + } + } + } + } + if (duplicate_categories.size() > 0 && allow_duplicates == false) { + cout << "Did not write due to duplicates in categories." << endl; + cout << "This commonly occurs when attempting to read the same pack multiple times or when separate packs coincidentally have the same name." << endl; + // TODO: This is the current advice. Further updates could allow other + // options like selective merges or changing category names to be unique + cout << "Please remove one of the packs or edit the name of the packs' top level category before running the program again." << endl; + cout << "If you want to bypass this stop, use '--allow-duplicates'." << endl; + cout << "The following top level categories were found in more than one pack:" << endl; + for (map>::iterator it = duplicate_categories.begin(); it != duplicate_categories.end(); it++) { + cout << it->first << " in files:" << endl; + for (string str : it->second) { + cout << str << endl; + } } return; } @@ -247,7 +283,7 @@ int main(int argc, char* argv[]) { // converting from xml markerpacks to internal protobuf files. vector output_split_guildpoint_paths; - vector* arg_target = &input_taco_paths; + vector* arg_target = nullptr; for (int i = 1; i < argc; i++) { if (!strcmp(argv[i], "--input-taco-path")) {