Skip to content

Commit

Permalink
[c++] Fix memory leak in Arrow table implementation (#6314)
Browse files Browse the repository at this point in the history
Co-authored-by: James Lamb <[email protected]>
  • Loading branch information
borchero and jameslamb authored Feb 22, 2024
1 parent 894066d commit 8b61a15
Showing 1 changed file with 20 additions and 1 deletion.
21 changes: 20 additions & 1 deletion include/LightGBM/arrow.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,9 @@ class ArrowChunkedArray {
*/
class ArrowTable {
std::vector<ArrowChunkedArray> columns_;
const int64_t n_chunks_;
const ArrowArray* chunks_ptr_;
const ArrowSchema* schema_ptr_;

public:
/**
Expand All @@ -216,7 +219,8 @@ class ArrowTable {
* @param chunks A C-style array containing the chunks.
* @param schema The schema for all chunks.
*/
inline ArrowTable(int64_t n_chunks, const ArrowArray* chunks, const ArrowSchema* schema) {
inline ArrowTable(int64_t n_chunks, const ArrowArray *chunks, const ArrowSchema *schema)
: n_chunks_(n_chunks), chunks_ptr_(chunks), schema_ptr_(schema) {
columns_.reserve(schema->n_children);
for (int64_t j = 0; j < schema->n_children; ++j) {
std::vector<const ArrowArray*> children_chunks;
Expand All @@ -229,6 +233,21 @@ class ArrowTable {
}
}

~ArrowTable() {
// As consumer of the Arrow array, the Arrow table must release all Arrow arrays it receives
// as well as the schema. As per the specification, children arrays are released by the
// producer. See: https://arrow.apache.org/docs/format/CDataInterface.html#release-callback-semantics-for-consumers
for (int64_t i = 0; i < n_chunks_; ++i) {
auto chunk = &chunks_ptr_[i];
if (chunk->release) {
chunk->release(const_cast<ArrowArray*>(chunk));
}
}
if (schema_ptr_->release) {
schema_ptr_->release(const_cast<ArrowSchema*>(schema_ptr_));
}
}

/**
* @brief Get the number of rows in the table.
*
Expand Down

0 comments on commit 8b61a15

Please sign in to comment.