diff --git a/vrs/RecordFileReader.cpp b/vrs/RecordFileReader.cpp index 6067d677..04e5db01 100644 --- a/vrs/RecordFileReader.cpp +++ b/vrs/RecordFileReader.cpp @@ -486,6 +486,8 @@ int RecordFileReader::closeFile() { openProgressLogger_ = &defaultProgressLogger_; streamIndex_.clear(); streamRecordCounts_.clear(); + recordBoundaries_.clear(); + recordLimits_.clear(); lastRequest_.clear(); fileHasAnIndex_ = false; return result; @@ -496,41 +498,92 @@ int RecordFileReader::clearStreamPlayers() { return 0; } -const vector& RecordFileReader::getRecordBoundaries() const { - if (recordBoundaries_.empty()) { - // records are not always perfectly sorted, so we can't tell easily where they end. - // The best guess, is the offset of the first record, after the current record... - // yep, that's a bit expensive, but we have few options... - recordBoundaries_.reserve(recordIndex_.size() + 1); - int64_t lastOffset = 0; - bool sortNeeded = false; +void RecordFileReader::buildRecordBoundaries(bool boundariesAndLimits) const { + if (recordIndex_.empty() || (!recordBoundaries_.empty() && !recordLimits_.empty()) || + (!boundariesAndLimits && (!recordBoundaries_.empty() || !recordLimits_.empty()))) { + return; + } + int sortErrors = 0; + int64_t lastOffset = 0; + for (const auto& r : recordIndex_) { + if (r.fileOffset < lastOffset) { + sortErrors++; + } + lastOffset = r.fileOffset; + } + if (sortErrors == 0 && !boundariesAndLimits) { + // files are usually fully sorted, and we don't need much + recordLimits_[recordIndex_.size() - 1] = endOfUserRecordsOffset_; + } else { + vector boundaries; + boundaries.reserve(recordIndex_.size() + 1); for (const auto& r : recordIndex_) { - recordBoundaries_.emplace_back(r.fileOffset); - if (r.fileOffset < lastOffset) { - sortNeeded = true; + boundaries.emplace_back(r.fileOffset); + } + boundaries.emplace_back(endOfUserRecordsOffset_); + sort(boundaries.begin(), boundaries.end()); + + // the array of boundaries can save memory if we have too many errors and the map is big + // We'll have to do a binary search for each record limit we need... + bool tooManyErrors = sortErrors > recordIndex_.size() / 10; + + if (boundariesAndLimits || !tooManyErrors) { + recordLimits_.clear(); + auto nextBoundary = boundaries.end(); + for (size_t i = 0; i < recordIndex_.size(); ++i) { + if (nextBoundary == boundaries.end() || *nextBoundary != recordIndex_[i].fileOffset || + ++nextBoundary == boundaries.end() || i == recordIndex_.size() - 1 || + *nextBoundary != recordIndex_[i + 1].fileOffset) { + nextBoundary = + upper_bound(boundaries.begin(), boundaries.end(), recordIndex_[i].fileOffset); + if (!XR_VERIFY(nextBoundary != boundaries.end())) { + tooManyErrors = true; + recordLimits_.clear(); + break; + } + if (i + 1 >= recordIndex_.size() || recordIndex_[i + 1].fileOffset != *nextBoundary) { + recordLimits_[i] = *nextBoundary; + } + } } - lastOffset = r.fileOffset; } - recordBoundaries_.emplace_back(endOfUserRecordsOffset_); - if (sortNeeded) { - sort(recordBoundaries_.begin(), recordBoundaries_.end()); + if (boundariesAndLimits || tooManyErrors) { + recordBoundaries_ = std::move(boundaries); } } - return recordBoundaries_; } -uint32_t RecordFileReader::getRecordSize(uint32_t recordIndex) const { - if (recordIndex >= recordIndex_.size()) { +int64_t RecordFileReader::getFollowingRecordOffset(uint32_t recordIndex, bool useBoundaries) const { + if (!XR_VERIFY(recordIndex < recordIndex_.size())) { return 0; } - const IndexRecord::RecordInfo& record = recordIndex_[recordIndex]; - const vector& boundaries = getRecordBoundaries(); - auto nextBoundary = upper_bound(boundaries.begin(), boundaries.end(), record.fileOffset); - if (!XR_VERIFY(nextBoundary != boundaries.end()) || - !XR_VERIFY(*nextBoundary > record.fileOffset)) { + if (recordBoundaries_.empty() && recordLimits_.empty()) { + buildRecordBoundaries(false); + } + if (useBoundaries && !recordBoundaries_.empty()) { + auto nextBoundary = upper_bound( + recordBoundaries_.begin(), recordBoundaries_.end(), recordIndex_[recordIndex].fileOffset); + if (XR_VERIFY(nextBoundary != recordBoundaries_.end())) { + return *nextBoundary; + } + } + auto nextIter = recordLimits_.find(recordIndex); + if (nextIter != recordLimits_.end()) { + return nextIter->second; + } + return XR_VERIFY(recordIndex < recordIndex_.size() - 1) ? recordIndex_[recordIndex + 1].fileOffset + : endOfUserRecordsOffset_; +} + +uint32_t RecordFileReader::getRecordSize(uint32_t recordIndex, bool useBoundaries) const { + if (recordIndex >= recordIndex_.size()) { return 0; } - return *nextBoundary - record.fileOffset; + int64_t nextOffset = getFollowingRecordOffset(recordIndex, useBoundaries); + if (XR_VERIFY(nextOffset > recordIndex_[recordIndex].fileOffset)) { + return static_cast(nextOffset - recordIndex_[recordIndex].fileOffset); + } + return 0; } bool RecordFileReader::prefetchRecordSequence( diff --git a/vrs/RecordFileReader.h b/vrs/RecordFileReader.h index 4386185b..089a0e9d 100644 --- a/vrs/RecordFileReader.h +++ b/vrs/RecordFileReader.h @@ -349,7 +349,11 @@ class RecordFileReader { uint32_t getRecordStreamIndex(const IndexRecord::RecordInfo* record) const; /// Get a record's disk size. - uint32_t getRecordSize(uint32_t recordIndex) const; + /// @param record: index of the record. + /// @param useBoundaries: if true, use the record's boundaries when available to compute the size. + /// For testing only: use the default value! + /// @return The record's size on disk, or 0 for invalid indexes. + uint32_t getRecordSize(uint32_t recordIndex, bool useBoundaries = true) const; /// Timestamp for the first data record in the whole file. /// @return The timestamp for the file data record, or 0, if the file contains no data record. @@ -554,6 +558,8 @@ class RecordFileReader { uint32_t totalCount() const; }; + void buildRecordBoundaries(bool boundariesAndLimits = false) const; ///< private, for testing only + private: int doOpenFile(const FileSpec& fileSpec, bool autoWriteFixedIndex, bool checkSignatureOnly); int readFileHeader(const FileSpec& fileSpec, FileFormat::FileHeader& outFileHeader); @@ -568,8 +574,9 @@ class RecordFileReader { static const string& getTag(const map& tags, const string& name); ///< private bool mightContainContentTypeInDataRecord(StreamId streamId, ContentType type) const; ///< private - /// Record boundaries, in sequential order, but not necessarily in record order! - const vector& getRecordBoundaries() const; ///< private + int64_t getFollowingRecordOffset(uint32_t recordIndex, bool useBoundaries) const; ///< private + mutable vector recordBoundaries_; ///< private + mutable map recordLimits_; ///< private // Members to read an open VRS file std::unique_ptr file_; @@ -591,7 +598,6 @@ class RecordFileReader { ProgressLogger* openProgressLogger_{&defaultProgressLogger_}; unique_ptr detailsSaveThread_; mutable map> streamIndex_; - mutable vector recordBoundaries_; // Location of the last record searched for a specific stream & record type // The pair: index of the record for the type (query), index of the record in the stream (result) mutable map, pair> lastRequest_; diff --git a/vrs/oss/test_data/VRS_Files/chunks-shuffled.vrs b/vrs/oss/test_data/VRS_Files/chunks-shuffled.vrs new file mode 100644 index 00000000..f8a4d636 Binary files /dev/null and b/vrs/oss/test_data/VRS_Files/chunks-shuffled.vrs differ diff --git a/vrs/test/GetRecordTest.cpp b/vrs/test/GetRecordTest.cpp index eeff802b..bb144fff 100644 --- a/vrs/test/GetRecordTest.cpp +++ b/vrs/test/GetRecordTest.cpp @@ -34,6 +34,8 @@ namespace { struct GetRecordTester : testing::Test { string kTestFile = os::pathJoin(coretech::getTestDataDir(), "VRS_Files/sample_file.vrs"); string kTestFile2 = os::pathJoin(coretech::getTestDataDir(), "VRS_Files/simulated.vrs"); + string kTestFile3 = os::pathJoin(coretech::getTestDataDir(), "VRS_Files/chunks.vrs"); + string kTestFile4 = os::pathJoin(coretech::getTestDataDir(), "VRS_Files/chunks-shuffled.vrs"); }; } // namespace @@ -374,3 +376,24 @@ TEST_F(GetRecordTester, GetRecordSizeTest) { {StreamId(RecordableTypeId::SlamImuData, 1), 1673048}}; EXPECT_EQ(streamSizes, actualSizes); } + +static size_t testGetRecordSize(const string& filename) { + vrs::RecordFileReader file; + EXPECT_EQ(file.openFile(filename), 0); + file.buildRecordBoundaries(true); + size_t totalSize = 0; + for (uint32_t i = 0; i < file.getRecordCount(); i++) { + uint32_t recordSize = file.getRecordSize(i, true); + totalSize += recordSize; + EXPECT_EQ(file.getRecordSize(i, false), recordSize); + } + EXPECT_LT(totalSize, file.getTotalSourceSize()); + return totalSize; +} + +TEST_F(GetRecordTester, GetRecordLimitsTest) { + EXPECT_EQ(testGetRecordSize(kTestFile), 79955); + EXPECT_EQ(testGetRecordSize(kTestFile2), 21302235); + EXPECT_EQ(testGetRecordSize(kTestFile3), 79606); + EXPECT_EQ(testGetRecordSize(kTestFile4), 79612); +}