Skip to content

Commit

Permalink
Simplify INI-style config reader using C++11 STL (#4478)
Browse files Browse the repository at this point in the history
* simplify the config.h file

* revise config.h

* revised config.h

* revise format

* revise format issues

* revise whitespace issues

* revise whitespace namespace format issues

* revise namespace format issues

* format issues

* format issues

* format issues

* format issues

* Revert submodule changes

* minor change

* Update src/common/config.h

Co-Authored-By: Philip Hyunsu Cho <[email protected]>

* address format issue from trivialfis

* Use correct cub submodule
  • Loading branch information
fuhaoda authored and hcho3 committed May 30, 2019
1 parent b48f895 commit dd60fc2
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 157 deletions.
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,4 @@ List of Contributors
* [Jiaxiang Li](https://github.com/JiaxiangBU)
* [Bryan Woods](https://github.com/bryan-woods)
- Bryan added support for cross-validation for the ranking objective
* [Haoda Fu](https://github.com/fuhaoda)
7 changes: 2 additions & 5 deletions src/cli_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -341,13 +341,10 @@ int CLIRunTask(int argc, char *argv[]) {
}
rabit::Init(argc, argv);

std::vector<std::pair<std::string, std::string> > cfg;
common::ConfigParse cp(argv[1]);
auto cfg = cp.Parse();
cfg.emplace_back("seed", "0");

common::ConfigIterator itr(argv[1]);
while (itr.Next()) {
cfg.emplace_back(std::string(itr.Name()), std::string(itr.Val()));
}

for (int i = 2; i < argc; ++i) {
char name[256], val[256];
Expand Down
196 changes: 44 additions & 152 deletions src/common/config.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/*!
* Copyright 2014 by Contributors
* Copyright 2014-2019 by Contributors
* \file config.h
* \brief helper class to load in configures from file
* \author Tianqi Chen
* \author Haoda Fu
*/
#ifndef XGBOOST_COMMON_CONFIG_H_
#define XGBOOST_COMMON_CONFIG_H_
Expand All @@ -12,181 +12,73 @@
#include <string>
#include <istream>
#include <fstream>
#include <vector>
#include <utility>

namespace xgboost {
namespace common {
/*!
* \brief base implementation of config reader
* \brief Implementation of config reader
*/
class ConfigReaderBase {
class ConfigParse {
public:
/*!
* \brief get current name, called after Next returns true
* \return current parameter name
*/
inline const char *Name() const {
return s_name_.c_str();
}
/*!
* \brief get current value, called after Next returns true
* \return current parameter value
*/
inline const char *Val() const {
return s_val_.c_str();
}
/*!
* \brief move iterator to next position
* \return true if there is value in next position
* \brief constructor
* \param cfgFileName name of configure file
*/
inline bool Next() {
while (!this->IsEnd()) {
GetNextToken(&s_name_);
if (s_name_ == "=") return false;
if (GetNextToken(&s_buf_) || s_buf_ != "=") return false;
if (GetNextToken(&s_val_) || s_val_ == "=") return false;
return true;
explicit ConfigParse(const std::string &cfgFileName) {
fi_.open(cfgFileName);
if (fi_.fail()) {
LOG(FATAL) << "cannot open file " << cfgFileName;
}
return false;
}
// called before usage
inline void Init() {
ch_buf_ = this->GetChar();
}

protected:
/*!
* \brief to be implemented by subclass,
* get next token, return EOF if end of file
* \brief parse the configure file
*/
virtual int GetChar() = 0;
/*! \brief to be implemented by child, check if end of stream */
virtual bool IsEnd() = 0;

private:
int ch_buf_;
std::string s_name_, s_val_, s_buf_;
std::vector<std::pair<std::string, std::string> > Parse() {
std::vector<std::pair<std::string, std::string> > results{};
char delimiter = '=';
char comment = '#';
std::string line{};
std::string name{};
std::string value{};

inline void SkipLine() {
do {
ch_buf_ = this->GetChar();
} while (ch_buf_ != EOF && ch_buf_ != '\n' && ch_buf_ != '\r');
}
while (!fi_.eof()) {
std::getline(fi_, line); // read a line of configure file
line = line.substr(0, line.find(comment)); // anything beyond # is comment
size_t delimiterPos = line.find(delimiter); // find the = sign
name = line.substr(0, delimiterPos); // anything before = is the name
// after this = is the value
value = line.substr(delimiterPos + 1, line.length() - delimiterPos - 1);

inline void ParseStr(std::string *tok) {
while ((ch_buf_ = this->GetChar()) != EOF) {
switch (ch_buf_) {
case '\\': *tok += this->GetChar(); break;
case '\"': return;
case '\r':
case '\n': LOG(FATAL)<< "ConfigReader: unterminated string";
default: *tok += static_cast<char>(ch_buf_);
}
}
LOG(FATAL) << "ConfigReader: unterminated string";
}
inline void ParseStrML(std::string *tok) {
while ((ch_buf_ = this->GetChar()) != EOF) {
switch (ch_buf_) {
case '\\': *tok += this->GetChar(); break;
case '\'': return;
default: *tok += static_cast<char>(ch_buf_);
}
}
LOG(FATAL) << "unterminated string";
}
// return newline
inline bool GetNextToken(std::string *tok) {
tok->clear();
bool new_line = false;
while (ch_buf_ != EOF) {
switch (ch_buf_) {
case '#' : SkipLine(); new_line = true; break;
case '\"':
if (tok->length() == 0) {
ParseStr(tok); ch_buf_ = this->GetChar(); return new_line;
} else {
LOG(FATAL) << "ConfigReader: token followed directly by string";
}
case '\'':
if (tok->length() == 0) {
ParseStrML(tok); ch_buf_ = this->GetChar(); return new_line;
} else {
LOG(FATAL) << "ConfigReader: token followed directly by string";
}
case '=':
if (tok->length() == 0) {
ch_buf_ = this->GetChar();
*tok = '=';
}
return new_line;
case '\r':
case '\n':
if (tok->length() == 0) new_line = true;
case '\t':
case ' ' :
ch_buf_ = this->GetChar();
if (tok->length() != 0) return new_line;
break;
default:
*tok += static_cast<char>(ch_buf_);
ch_buf_ = this->GetChar();
break;
}
}
if (tok->length() == 0) {
return true;
} else {
return false;
if (line.empty() || name.empty() || value.empty())
continue; // skip a line if # at beginning or there is no value or no name.
CleanString(&name); // clean the string
CleanString(&value);
results.emplace_back(name, value);
}
return results;
}
};
/*!
* \brief an iterator use stream base, allows use all types of istream
*/
class ConfigStreamReader: public ConfigReaderBase {
public:
/*!
* \brief constructor
* \param fin istream input stream
*/
explicit ConfigStreamReader(std::istream &fin) : fin_(fin) {}

protected:
int GetChar() override {
return fin_.get();
}
/*! \brief to be implemented by child, check if end of stream */
bool IsEnd() override {
return fin_.eof();
~ConfigParse() {
fi_.close();
}

private:
std::istream &fin_;
};
std::ifstream fi_;
std::string allowableChar_ =
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-./\\";

/*!
* \brief an iterator that iterates over a configure file and gets the configures
*/
class ConfigIterator: public ConfigStreamReader {
public:
/*!
* \brief constructor
* \param fname name of configure file
* \brief remove unnecessary chars.
*/
explicit ConfigIterator(const char *fname) : ConfigStreamReader(fi_) {
fi_.open(fname);
if (fi_.fail()) {
LOG(FATAL) << "cannot open file " << fname;
}
ConfigReaderBase::Init();
void CleanString(std::string * str) {
size_t firstIndx = str->find_first_of(allowableChar_);
size_t lastIndx = str->find_last_of(allowableChar_);
// this line can be more efficient, but keep as is for simplicity.
*str = str->substr(firstIndx, lastIndx - firstIndx + 1);
}
/*! \brief destructor */
~ConfigIterator() {
fi_.close();
}

private:
std::ifstream fi_;
};
} // namespace common
} // namespace xgboost
Expand Down

0 comments on commit dd60fc2

Please sign in to comment.