Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added tracking for the top level categories in each pack. #357

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,14 @@ input_paths:
"pack2": "proto"
"pack3": "xml"
expected_stdout: |
The following top level categories were found in more than one pack
mycategory
mycategory
Did not write due to duplicates in categories. If you want to bypass this, use '--allow-duplicates'
Did not write due to duplicates in categories.
This commonly occurs when attempting to read the same pack multiple times or when separate packs coincidentally have the same name.
Please remove one of the packs or edit the name of the packs' top level category before running the program again.
If you want to bypass this stop, use '--allow-duplicates'.
The following top level categories were found in more than one pack:
mycategory in files
pack/xml_file.xml
pack3/xml_file.xml
pack2/markers.bin
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
expected_stderr: |
expected_returncode: 0
27 changes: 18 additions & 9 deletions xml_converter/src/packaging_xml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,35 @@ OptionalString parse_marker_categories(
int depth = 0) {
OptionalString name = {
"", // value
false, // error
false, // is_null
};
if (get_node_name(node) == "MarkerCategory") {
rapidxml::xml_attribute<>* name_attribute = find_attribute(node, "name");
if (name_attribute == 0) {
// TODO: This error should really be for the entire node not just the name
errors->push_back(new XMLNodeNameError("Category attribute 'name' is missing so it cannot be properly referenced", node));
name.error = true;

// TODO: Maybe fall back on display name slugification.
name.value = "UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER);
name = {
"UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER),
false,
};
UNKNOWN_CATEGORY_COUNTER++;
}
else {
name.value = lowercase(get_attribute_value(name_attribute));
name = {
lowercase(get_attribute_value(name_attribute)),
false,
};
}

if (name.value == "") {
errors->push_back(new XMLNodeNameError("Category attribute 'name' is an empty string so it cannot be properly referenced", node));
name.error = true;
// TODO: Maybe fall back on display name slugification.
name.value = "UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER);
name = {
"UNKNOWN_CATEGORY_" + to_string(UNKNOWN_CATEGORY_COUNTER),
false,
};
UNKNOWN_CATEGORY_COUNTER++;
}

Expand All @@ -64,7 +71,6 @@ OptionalString parse_marker_categories(
category = &existing_category_search->second;
if (category->parent != parent) {
errors->push_back(new XMLNodeNameError("Category somehow has a different parent then it used to. This might be a bug in xml_converter", node));
name.error = true;
}
}

Expand Down Expand Up @@ -92,7 +98,10 @@ OptionalString parse_marker_categories(
}
else {
errors->push_back(new XMLNodeNameError("Unknown MarkerCategory Tag", node));
name.error = true;
name = {
"",
true,
};
return name;
}
}
Expand Down Expand Up @@ -221,7 +230,7 @@ set<string> parse_xml_file(string xml_filepath, const string marker_pack_root_di
for (rapidxml::xml_node<>* node = root_node->first_node(); node; node = node->next_sibling()) {
if (get_node_name(node) == "MarkerCategory") {
OptionalString name = parse_marker_categories(node, marker_categories, nullptr, &errors, &state);
if (name.error == false) {
if (name.is_null == false) {
category_names.insert(name.value);
}
}
Expand Down
13 changes: 0 additions & 13 deletions xml_converter/src/string_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,16 +315,3 @@ std::string long_to_hex_string(uint64_t number) {

return hex_string;
}

void combine_sets(
std::set<std::string>* set_a,
std::set<std::string>* set_b,
std::vector<std::string>* duplicates) {
for (string str : *set_a) {
if (auto search = set_b->find(str); search != set_b->end()) {
duplicates->push_back(str);
}
else
set_b->insert(str);
}
}
3 changes: 1 addition & 2 deletions xml_converter/src/string_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

struct OptionalString {
std::string value;
bool error;
bool is_null;
};

bool matches_any(std::string test, std::initializer_list<std::string> list);
Expand All @@ -30,4 +30,3 @@ bool has_suffix(std::string const& fullString, std::string const& ending);
std::string join_file_paths(const std::string& path_a, const std::string& path_b);

std::string long_to_hex_string(uint64_t number);
void combine_sets(std::set<std::string>* set_a, std::set<std::string>* set_b, std::vector<std::string>* duplicates);
80 changes: 58 additions & 22 deletions xml_converter/src/xml_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,48 +62,62 @@ vector<string> get_files_by_suffix(string directory, string suffix) {
return files;
}

set<string> read_taco_directory(
map<string, vector<string>> read_taco_directory(
string input_path,
map<string, Category>* marker_categories,
vector<Parseable*>* parsed_pois) {
set<string> top_level_categories;
map<string, vector<string>> category_to_filepath;
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
if (!filesystem::exists(input_path)) {
cout << "Error: " << input_path << " is not an existing directory or file" << endl;
}
else if (filesystem::is_directory(input_path)) {
string directory_name = filesystem::path(input_path).filename();
vector<string> xml_files = get_files_by_suffix(input_path, ".xml");
for (const string& path : xml_files) {
set<string> category_names = (parse_xml_file(path, input_path, marker_categories, parsed_pois));
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
top_level_categories.insert(category_names.begin(), category_names.end());
string relative_path = directory_name + path.substr(input_path.size());
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
for (set<string>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_filepath[*it].push_back(relative_path);
}
}
}
else if (filesystem::is_regular_file(input_path)) {
set<string> category_names = parse_xml_file(input_path, get_base_dir(input_path), marker_categories, parsed_pois);
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
top_level_categories.insert(category_names.begin(), category_names.end());
string filename = filesystem::path(input_path).filename();
for (set<string>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_filepath[*it].push_back(filename);
}
}
return top_level_categories;
return category_to_filepath;
}

set<string> read_burrito_directory(
map<string, vector<string>> read_burrito_directory(
string input_path,
map<string, Category>* marker_categories,
vector<Parseable*>* parsed_pois) {
set<string> top_level_categories;
map<string, vector<string>> category_to_filepath;
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
if (!filesystem::exists(input_path)) {
cout << "Error: " << input_path << " is not an existing directory or file" << endl;
}
else if (filesystem::is_directory(input_path)) {
string directory_name = filesystem::path(input_path).filename();
vector<string> burrito_files = get_files_by_suffix(input_path, ".bin");
for (const string& path : burrito_files) {
set<string> category_names = read_protobuf_file(path, input_path, marker_categories, parsed_pois);
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
top_level_categories.insert(category_names.begin(), category_names.end());
string relative_path = directory_name + path.substr(input_path.size());
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
for (set<string>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_filepath[*it].push_back(relative_path);
}
}
}
else if (filesystem::is_regular_file(input_path)) {
set<string> category_names = read_protobuf_file(input_path, get_base_dir(input_path), marker_categories, parsed_pois);
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
top_level_categories.insert(category_names.begin(), category_names.end());
string filename = filesystem::path(input_path).filename();
for (set<string>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_filepath[*it].push_back(filename);
}
}
return top_level_categories;
return category_to_filepath;
}

void write_taco_directory(
Expand Down Expand Up @@ -158,19 +172,22 @@ void process_data(
// All of the loaded pois and categories
vector<Parseable*> parsed_pois;
map<string, Category> marker_categories;
set<string> top_level_categories;
vector<string> duplicate_categories;
map<string, vector<vector<string>>> category_to_marker_pack_names;
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
map<string, vector<string>> duplicate_categories;
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved

// Read in all the xml taco markerpacks
auto begin = chrono::high_resolution_clock::now();
for (size_t i = 0; i < input_taco_paths.size(); i++) {
cout << "Loading taco pack " << input_taco_paths[i] << endl;

set<string> category_names = read_taco_directory(
// swap this to map <category, set<filenames>>)
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
map<string, vector<string>> category_names = read_taco_directory(
input_taco_paths[i],
&marker_categories,
&parsed_pois);
combine_sets(&category_names, &top_level_categories, &duplicate_categories);
for (map<string, vector<string>>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_marker_pack_names[it->first].push_back(it->second);
}
}
auto end = chrono::high_resolution_clock::now();
auto dur = end - begin;
Expand All @@ -181,18 +198,37 @@ void process_data(
for (size_t i = 0; i < input_guildpoint_paths.size(); i++) {
cout << "Loading guildpoint pack " << input_guildpoint_paths[i] << endl;

set<string> category_names = read_burrito_directory(
map<string, vector<string>> category_names = read_burrito_directory(
AsherGlick marked this conversation as resolved.
Show resolved Hide resolved
input_guildpoint_paths[i],
&marker_categories,
&parsed_pois);
combine_sets(&category_names, &top_level_categories, &duplicate_categories);
for (map<string, vector<string>>::iterator it = category_names.begin(); it != category_names.end(); it++) {
category_to_marker_pack_names[it->first].push_back(it->second);
}
}

if (duplicate_categories.size() > 0 && allow_duplicates != true) {
cout << "Did not write due to duplicates in categories. If you want to bypass this, use '--allow-duplicates'" << endl;
cout << "The following top level categories were found in more than one pack" << endl;
for (size_t i = 0; i < duplicate_categories.size(); i++) {
cout << duplicate_categories[i] << endl;
for (map<string, vector<vector<string>>>::iterator it = category_to_marker_pack_names.begin(); it != category_to_marker_pack_names.end(); it++) {
if (it->second.size() != 1) {
for (size_t i = 0; i < it->second.size(); i++) {
for (size_t j = 0; j < it->second[i].size(); j++) {
duplicate_categories[it->first].push_back(it->second[i][j]);
}
}
}
}
if (duplicate_categories.size() > 0 && allow_duplicates == false) {
cout << "Did not write due to duplicates in categories." << endl;
cout << "This commonly occurs when attempting to read the same pack multiple times or when separate packs coincidentally have the same name." << endl;
// TODO: This is the current advice. Further updates could allow other
// options like selective merges or changing category names to be unique
cout << "Please remove one of the packs or edit the name of the packs' top level category before running the program again." << endl;
cout << "If you want to bypass this stop, use '--allow-duplicates'." << endl;
cout << "The following top level categories were found in more than one pack:" << endl;
for (map<string, vector<string>>::iterator it = duplicate_categories.begin(); it != duplicate_categories.end(); it++) {
cout << it->first << " in files:" << endl;
for (string str : it->second) {
cout << str << endl;
}
}
return;
}
Expand Down Expand Up @@ -247,7 +283,7 @@ int main(int argc, char* argv[]) {
// converting from xml markerpacks to internal protobuf files.
vector<string> output_split_guildpoint_paths;

vector<string>* arg_target = &input_taco_paths;
vector<string>* arg_target = nullptr;

for (int i = 1; i < argc; i++) {
if (!strcmp(argv[i], "--input-taco-path")) {
Expand Down
Loading