Skip to content

Commit

Permalink
Addressing code review. Changes to optional string class. Changed how…
Browse files Browse the repository at this point in the history
… conflicts are communicated
  • Loading branch information
klingbolt committed Nov 11, 2024
1 parent afef728 commit 43acbc4
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@ input_paths:
"pack2": "proto"
"pack3": "xml"
expected_stdout: |
The following top level categories were found in more than one pack
mycategory
mycategory
Did not write due to duplicates in categories. If you want to bypass this, use '--allow-duplicates'
Did not write due to duplicates in categories.
This commonly occurs when attempting to read the same pack multiple times or when separate packs coincidentally have the same name.
Please remove one of the packs or edit the name of the packs' top level category before running the program again.
If you want to bypass this stop, use '--allow-duplicates'.
The following top level categories were found in more than one pack:
mycategory in files
pack/xml_file.xml
pack3/xml_file.xml
pack2/markers.bin
expected_stderr: |
expected_returncode: 0
27 changes: 18 additions & 9 deletions xml_converter/src/packaging_xml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,35 @@ OptionalString parse_marker_categories(
int depth = 0) {
OptionalString name = {
"", // value
false, // error
false, // is_null
};
if (get_node_name(node) == "MarkerCategory") {
rapidxml::xml_attribute<>* name_attribute = find_attribute(node, "name");
if (name_attribute == 0) {
// TODO: This error should really be for the entire node not just the name
errors->push_back(new XMLNodeNameError("Category attribute 'name' is missing so it cannot be properly referenced", node));
name.error = true;

// TODO: Maybe fall back on display name slugification.
name.value = "UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER);
name = {
"UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER),
false,
};
UNKNOWN_CATEGORY_COUNTER++;
}
else {
name.value = lowercase(get_attribute_value(name_attribute));
name = {
lowercase(get_attribute_value(name_attribute)),
false,
};
}

if (name.value == "") {
errors->push_back(new XMLNodeNameError("Category attribute 'name' is an empty string so it cannot be properly referenced", node));
name.error = true;
// TODO: Maybe fall back on display name slugification.
name.value = "UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER);
name = {
"UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER),
false,
};
UNKNOWN_CATEGORY_COUNTER++;
}

Expand All @@ -64,7 +71,6 @@ OptionalString parse_marker_categories(
category = &existing_category_search->second;
if (category->parent != parent) {
errors->push_back(new XMLNodeNameError("Category somehow has a different parent then it used to. This might be a bug in xml_converter", node));
name.error = true;
}
}

Expand Down Expand Up @@ -92,7 +98,10 @@ OptionalString parse_marker_categories(
}
else {
errors->push_back(new XMLNodeNameError("Unknown MarkerCategory Tag", node));
name.error = true;
name = {
"",
true,
};
return name;
}
}
Expand Down Expand Up @@ -221,7 +230,7 @@ set<string> parse_xml_file(string xml_filepath, const string marker_pack_root_di
for (rapidxml::xml_node<>* node = root_node->first_node(); node; node = node->next_sibling()) {
if (get_node_name(node) == "MarkerCategory") {
OptionalString name = parse_marker_categories(node, marker_categories, nullptr, &errors, &state);
if (name.error == false) {
if (name.is_null == false) {
category_names.insert(name.value);
}
}
Expand Down
13 changes: 0 additions & 13 deletions xml_converter/src/string_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,16 +315,3 @@ std::string long_to_hex_string(uint64_t number) {

return hex_string;
}

void combine_sets(
std::set<std::string>* set_a,
std::set<std::string>* set_b,
std::vector<std::string>* duplicates) {
for (string str : *set_a) {
if (auto search = set_b->find(str); search != set_b->end()) {
duplicates->push_back(str);
}
else
set_b->insert(str);
}
}
3 changes: 1 addition & 2 deletions xml_converter/src/string_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

struct OptionalString {
std::string value;
bool error;
bool is_null;
};

bool matches_any(std::string test, std::initializer_list<std::string> list);
Expand All @@ -30,4 +30,3 @@ bool has_suffix(std::string const& fullString, std::string const& ending);
std::string join_file_paths(const std::string& path_a, const std::string& path_b);

std::string long_to_hex_string(uint64_t number);
void combine_sets(std::set<std::string>* set_a, std::set<std::string>* set_b, std::vector<std::string>* duplicates);
80 changes: 58 additions & 22 deletions xml_converter/src/xml_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,48 +62,62 @@ vector<string> get_files_by_suffix(string directory, string suffix) {
return files;
}

set<string> read_taco_directory(
map<string, vector<string>> read_taco_directory(
string input_path,
map<string, Category>* marker_categories,
vector<Parseable*>* parsed_pois) {
set<string> top_level_categories;
map<string, vector<string>> category_to_filepath;
if (!filesystem::exists(input_path)) {
cout << "Error: " << input_path << " is not an existing directory or file" << endl;
}
else if (filesystem::is_directory(input_path)) {
string directory_name = filesystem::path(input_path).filename();
vector<string> xml_files = get_files_by_suffix(input_path, ".xml");
for (const string& path : xml_files) {
set<string> category_names = (parse_xml_file(path, input_path, marker_categories, parsed_pois));
top_level_categories.insert(category_names.begin(), category_names.end());
string relative_path = directory_name + path.substr(input_path.size());
for (set<string>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_filepath[*it].push_back(relative_path);
}
}
}
else if (filesystem::is_regular_file(input_path)) {
set<string> category_names = parse_xml_file(input_path, get_base_dir(input_path), marker_categories, parsed_pois);
top_level_categories.insert(category_names.begin(), category_names.end());
string filename = filesystem::path(input_path).filename();
for (set<string>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_filepath[*it].push_back(filename);
}
}
return top_level_categories;
return category_to_filepath;
}

set<string> read_burrito_directory(
map<string, vector<string>> read_burrito_directory(
string input_path,
map<string, Category>* marker_categories,
vector<Parseable*>* parsed_pois) {
set<string> top_level_categories;
map<string, vector<string>> category_to_filepath;
if (!filesystem::exists(input_path)) {
cout << "Error: " << input_path << " is not an existing directory or file" << endl;
}
else if (filesystem::is_directory(input_path)) {
string directory_name = filesystem::path(input_path).filename();
vector<string> burrito_files = get_files_by_suffix(input_path, ".bin");
for (const string& path : burrito_files) {
set<string> category_names = read_protobuf_file(path, input_path, marker_categories, parsed_pois);
top_level_categories.insert(category_names.begin(), category_names.end());
string relative_path = directory_name + path.substr(input_path.size());
for (set<string>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_filepath[*it].push_back(relative_path);
}
}
}
else if (filesystem::is_regular_file(input_path)) {
set<string> category_names = read_protobuf_file(input_path, get_base_dir(input_path), marker_categories, parsed_pois);
top_level_categories.insert(category_names.begin(), category_names.end());
string filename = filesystem::path(input_path).filename();
for (set<string>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_filepath[*it].push_back(filename);
}
}
return top_level_categories;
return category_to_filepath;
}

void write_taco_directory(
Expand Down Expand Up @@ -158,19 +172,22 @@ void process_data(
// All of the loaded pois and categories
vector<Parseable*> parsed_pois;
map<string, Category> marker_categories;
set<string> top_level_categories;
vector<string> duplicate_categories;
map<string, vector<vector<string>>> category_to_marker_pack_names;
map<string, vector<string>> duplicate_categories;

// Read in all the xml taco markerpacks
auto begin = chrono::high_resolution_clock::now();
for (size_t i = 0; i < input_taco_paths.size(); i++) {
cout << "Loading taco pack " << input_taco_paths[i] << endl;

set<string> category_names = read_taco_directory(
// swap this to map <category, set<filenames>>)
map<string, vector<string>> category_names = read_taco_directory(
input_taco_paths[i],
&marker_categories,
&parsed_pois);
combine_sets(&category_names, &top_level_categories, &duplicate_categories);
for (map<string, vector<string>>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_marker_pack_names[it->first].push_back(it->second);
}
}
auto end = chrono::high_resolution_clock::now();
auto dur = end - begin;
Expand All @@ -181,18 +198,37 @@ void process_data(
for (size_t i = 0; i < input_guildpoint_paths.size(); i++) {
cout << "Loading guildpoint pack " << input_guildpoint_paths[i] << endl;

set<string> category_names = read_burrito_directory(
map<string, vector<string>> category_names = read_burrito_directory(
input_guildpoint_paths[i],
&marker_categories,
&parsed_pois);
combine_sets(&category_names, &top_level_categories, &duplicate_categories);
for (map<string, vector<string>>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_marker_pack_names[it->first].push_back(it->second);
}
}

if (duplicate_categories.size() > 0 && allow_duplicates != true) {
cout << "Did not write due to duplicates in categories. If you want to bypass this, use '--allow-duplicates'" << endl;
cout << "The following top level categories were found in more than one pack" << endl;
for (size_t i = 0; i < duplicate_categories.size(); i++) {
cout << duplicate_categories[i] << endl;
for (map<string, vector<vector<string>>>::iterator it = category_to_marker_pack_names.begin(); it != category_to_marker_pack_names.end(); it++) {
if (it->second.size() != 1) {
for (size_t i = 0; i < it->second.size(); i++) {
for (size_t j = 0; j < it->second[i].size(); j++) {
duplicate_categories[it->first].push_back(it->second[i][j]);
}
}
}
}
if (duplicate_categories.size() > 0 && allow_duplicates == false) {
cout << "Did not write due to duplicates in categories." << endl;
cout << "This commonly occurs when attempting to read the same pack multiple times or when separate packs coincidentally have the same name." << endl;
// TODO: This is the current advice. Further updates could allow other
// options like selective merges or changing category names to be unique
cout << "Please remove one of the packs or edit the name of the packs' top level category before running the program again." << endl;
cout << "If you want to bypass this stop, use '--allow-duplicates'." << endl;
cout << "The following top level categories were found in more than one pack:" << endl;
for (map<string, vector<string>>::iterator it = duplicate_categories.begin(); it != duplicate_categories.end(); it++) {
cout << it->first << " in files:" << endl;
for (string str : it->second) {
cout << str << endl;
}
}
return;
}
Expand Down Expand Up @@ -247,7 +283,7 @@ int main(int argc, char* argv[]) {
// converting from xml markerpacks to internal protobuf files.
vector<string> output_split_guildpoint_paths;

vector<string>* arg_target = &input_taco_paths;
vector<string>* arg_target = nullptr;

for (int i = 1; i < argc; i++) {
if (!strcmp(argv[i], "--input-taco-path")) {
Expand Down

0 comments on commit 43acbc4

Please sign in to comment.