Skip to content

Commit

Permalink
fix read for large files, should solve #30
Browse files Browse the repository at this point in the history
  • Loading branch information
DavZim committed Aug 25, 2024
1 parent f2cca80 commit f7d2485
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 19 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: RITCH
Type: Package
Title: R Parser for the ITCH-Protocol
Version: 0.1.26
Version: 0.1.27
Authors@R: c(
person("David", "Zimmermann-Kollenda", , "[email protected]", role = c("aut", "cre"))
)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# RITCH 0.1.76

* fix bug where no messages would be reported for larger files

# RITCH 0.1.26

* fix bug where gz functionality would write to user library or current directory
Expand Down
12 changes: 6 additions & 6 deletions debug/debug_tools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ void dbg_itch_file(std::string filename = "inst/extdata/ex20101224.TEST_ITCH_50"

Rprintf("Number of Messages:\n");
for (int j = 0; j < N_ACT_MSGS; j++) {
Rprintf("- '%c': %i\n", ACT_MSG_NAMES[j], counts[j]);
Rprintf("- '%c': %ld\n", ACT_MSG_NAMES[j], counts[j]);
}
Rprintf("=============================\n");
// Use the Buffer
Expand Down Expand Up @@ -154,7 +154,7 @@ void dbg_itch_file(std::string filename = "inst/extdata/ex20101224.TEST_ITCH_50"
}
}

Rprintf("'%c' (len 2 + %i) idx %4i at offset %5i (0x%04x) | ", num, l - 2, i, idx, idx);
Rprintf("'%c' (len 2 + %i) idx %4i at offset %5ld (0x%04lx) | ", num, l - 2, i, idx, idx);
Rprintf("(%02x %02x) ", bufferPtr[idx], bufferPtr[idx + 1]);
for (int x = 2; x < l; x++) Rprintf("%02x ", bufferPtr[idx + x]);
Rprintf("\n");
Expand Down Expand Up @@ -237,14 +237,14 @@ dbg_hex_to_dbl <- function(h, prec = 4) {
*/

// converts a std::string of hex values to a buffer
char * to_buffer(std::string x) {
unsigned char * to_buffer(std::string x) {
x.erase(remove_if(x.begin(), x.end(), isspace), x.end());
const uint64_t n_bytes = x.size() / 2;
unsigned char * buf;
// Rprintf("Found %u bytes\n", x.size() / 2);
buf = (unsigned char*) calloc(x.size() / 2, sizeof(unsigned char));

for (int j = 0; j < n_bytes; j++)
for (uint64_t j = 0; j < n_bytes; j++)
buf[j] = std::stoul(x.substr(j * 2, 2), nullptr, 16);
return buf;
}
Expand All @@ -263,7 +263,7 @@ Rcpp::DataFrame hex_count_messages_impl(std::string x) {
std::vector<int64_t> count = count_messages_buffer(buf, n_bytes);

Rcpp::StringVector types;
for (unsigned char c : ACT_MSG_NAMES) types.push_back(c);
for (unsigned char c : ACT_MSG_NAMES) types.push_back(std::string(1, c));

Rcpp::List df(2);
df.names() = Rcpp::CharacterVector::create("msg_type", "count");
Expand Down Expand Up @@ -322,7 +322,7 @@ Rcpp::DataFrame dbg_hex_to_df(std::string x, std::string msg_class) {
MessageParser mp(msg_class, 0, 100); // take max 100 messages...
mp.activate();
mp.init_vectors(n_messages + 100);
int64_t i = 2;
uint64_t i = 2;

while (i < n_bytes) {
mp.parse_message(&buf[i]);
Expand Down
15 changes: 11 additions & 4 deletions src/count_messages.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,16 @@ std::vector<int64_t> count_messages_internal(std::string filename,
}

// get size of the file
fseek(infile, 0L, SEEK_END);
int64_t filesize = ftell(infile);
fseek(infile, 0L, SEEK_SET);
if (fseeko64(infile, 0L, SEEK_END) != 0) {
Rcpp::stop("Error seeking to end of file");
}
int64_t filesize = ftello64(infile);
if (filesize == -1) {
Rcpp::stop("Error getting file size");
}
if (fseeko64(infile, 0L, SEEK_SET) != 0) {
Rcpp::stop("Error seeking back to start of file");
}

// create buffer
int64_t buf_size = max_buffer_size > filesize ? filesize : max_buffer_size;
Expand Down Expand Up @@ -42,7 +49,7 @@ std::vector<int64_t> count_messages_internal(std::string filename,

// align the file pointer to read in a full message again
const int64_t offset = i - this_buffer_size;
fseek(infile, offset, SEEK_CUR);
fseeko64(infile, offset, SEEK_CUR);
bytes_read += i;
}

Expand Down
15 changes: 11 additions & 4 deletions src/filter_itch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,16 @@ void filter_itch_impl(std::string infile, std::string outfile,
}

// get size of the file
fseek(ifile, 0L, SEEK_END);
int64_t filesize = ftell(ifile);
fseek(ifile, 0L, SEEK_SET);
if (fseeko64(ifile, 0L, SEEK_END) != 0) {
Rcpp::stop("Error seeking to end of file");
}
int64_t filesize = ftello64(ifile);
if (filesize == -1) {
Rcpp::stop("Error getting file size");
}
if (fseeko64(ifile, 0L, SEEK_SET) != 0) {
Rcpp::stop("Error seeking back to start of file");
}

// create buffer
int64_t buf_size = max_buffer_size > filesize ? filesize : max_buffer_size;
Expand Down Expand Up @@ -153,7 +160,7 @@ void filter_itch_impl(std::string infile, std::string outfile,
const int64_t offset = i - this_buffer_size;
// Rprintf("Filter ibuf at %6lld offsetting by %3lld - Total bytes read %lld\n",
// i, offset, bytes_read + i);
fseek(ifile, offset, SEEK_CUR);
fseeko64(ifile, offset, SEEK_CUR);
bytes_read += i;
}

Expand Down
15 changes: 11 additions & 4 deletions src/read_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,16 @@ Rcpp::List read_itch_impl(std::vector<std::string> classes,
}

// get size of the file
fseek(infile, 0L, SEEK_END);
int64_t filesize = ftell(infile);
fseek(infile, 0L, SEEK_SET);
if (fseeko64(infile, 0L, SEEK_END) != 0) {
Rcpp::stop("Error seeking to end of file");
}
int64_t filesize = ftello64(infile);
if (filesize == -1) {
Rcpp::stop("Error getting file size");
}
if (fseeko64(infile, 0L, SEEK_SET) != 0) {
Rcpp::stop("Error seeking back to start of file");
}

// create buffer
int64_t buf_size = max_buffer_size > filesize ? filesize : max_buffer_size;
Expand Down Expand Up @@ -161,7 +168,7 @@ Rcpp::List read_itch_impl(std::vector<std::string> classes,

// offset file pointer to fit the next message into the buffer
const int64_t offset = i - this_buffer_size;
fseek(infile, offset, SEEK_CUR);
fseeko64(infile, offset, SEEK_CUR);
bytes_read += i;
}

Expand Down

0 comments on commit f7d2485

Please sign in to comment.