New tool to read an image from an image segment [POC]

Summary: C++ API and and experimental tool to read an image directly from a file using an image location information, including a sub-segment of a compressed block. The API lets you control if the image is merely read or decoded, and maybe normalized to grey8/grey16/rgb. A version of the API uses an open FileHandler, useful if you're reading multiple images from the same file. Another interprets and already read memory block, in case you've been reading different file segments from the cloud using some other API. Differential Revision: D64639367 fbshipit-source-id: 1a527691388aad75b24ad06565bdf39c28929a44
facebookresearch · Oct 24, 2024 · 0a3351b · 0a3351b
1 parent 5ff5ef5
commit 0a3351b
Show file tree

Hide file tree

Showing 4 changed files with 260 additions and 1 deletion.
diff --git a/vrs/utils/BufferRecordReader.hpp b/vrs/utils/BufferRecordReader.hpp
@@ -28,6 +28,9 @@ namespace vrs::utils {
 class BufferFileHandler : public FileHandler {
  public:
   BufferFileHandler() : fileHandlerName_{"BufferFileHandler"} {}
+  BufferFileHandler(const void* data, size_t length) : fileHandlerName_{"BufferFileHandler"} {
+    init(data, length);
+  }
   template <class T>
   explicit BufferFileHandler(const vector<T>& buffer) {
     init(buffer);

diff --git a/vrs/utils/ImageLoader.cpp b/vrs/utils/ImageLoader.cpp
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define DEFAULT_LOG_CHANNEL "ImageLoader"
+#include <logging/Log.h>
+
+#include <vrs/Compressor.h>
+#include <vrs/RecordReaders.h>
+#include <vrs/os/Utils.h>
+#include <vrs/utils/BufferRecordReader.hpp>
+#include <vrs/utils/PixelFrame.h>
+#include <vrs/utils/VideoRecordFormatStreamPlayer.h>
+
+#include "ImageLoader.h"
+
+using namespace std;
+
+namespace vrs::utils {
+
+bool loadImage(
+    const void* data,
+    size_t length,
+    PixelFrame& outFrame,
+    const DirectImageReference& imageRef,
+    ImageLoadType loadType) {
+  BufferFileHandler file(data, length);
+  DirectImageReference zeroImageRef = imageRef;
+  zeroImageRef.dataOffset = 0;
+  return loadImage(file, outFrame, zeroImageRef, loadType);
+}
+
+bool loadImage(
+    FileHandler& file,
+    PixelFrame& outFrame,
+    const DirectImageReference& imageRef,
+    ImageLoadType loadType) {
+  int64_t fileSize = file.getTotalSize();
+  if (imageRef.dataOffset < 0 || imageRef.dataOffset >= fileSize) {
+    XR_LOGE("Invalid location offset: {} (file size: {})", imageRef.dataOffset, fileSize);
+    return false;
+  }
+  if (imageRef.dataOffset + imageRef.dataSize > fileSize) {
+    XR_LOGE("Invalid location length: {} (file size: {})", imageRef.dataSize, fileSize);
+    return false;
+  }
+
+  if (imageRef.compressionType != CompressionType::None) {
+    if (imageRef.compressedOffset >= imageRef.dataSize) {
+      XR_LOGE(
+          "Invalid compressed offset: {} (data size: {})",
+          imageRef.compressedOffset,
+          imageRef.dataSize);
+      return false;
+    }
+  }
+
+  file.setPos(imageRef.dataOffset);
+  uint32_t uncompressedDataSize = 0;
+  RecordReader* reader = nullptr;
+  UncompressedRecordReader uncompressedRecordReader;
+  CompressedRecordReader compressedRecordReader;
+  switch (imageRef.compressionType) {
+    case CompressionType::None:
+      uncompressedDataSize = imageRef.dataSize;
+      reader = uncompressedRecordReader.init(file, imageRef.dataSize, imageRef.dataSize);
+      break;
+    case CompressionType::Lz4:
+    case CompressionType::Zstd: {
+      uncompressedDataSize = imageRef.compressedLength;
+      reader = compressedRecordReader.init(
+          file, imageRef.dataSize, imageRef.compressedOffset + imageRef.compressedLength);
+      compressedRecordReader.initCompressionType(imageRef.compressionType);
+      if (imageRef.compressedOffset > 0) {
+        uint32_t readSize = imageRef.compressedOffset;
+        vector<uint8_t> compressedBuffer(readSize);
+        DataReference dataReference(compressedBuffer);
+        if (compressedRecordReader.read(dataReference, readSize) != 0) {
+          XR_LOGE("Failed to read compressed offset data");
+          return false;
+        }
+      }
+    } break;
+    default:
+      XR_LOGE("Can interpret compressed data.");
+      return false;
+  }
+
+  ImageContentBlockSpec spec(imageRef.imageFormat);
+  size_t imageSpecSize = spec.getRawImageSize();
+  if (imageSpecSize != ContentBlock::kSizeUnknown && imageSpecSize != uncompressedDataSize) {
+    XR_LOGE(
+        "Image size mismatch: {} => {} vs {}",
+        imageRef.imageFormat,
+        imageSpecSize,
+        uncompressedDataSize);
+    return false;
+  }
+
+  ContentBlock contentBlock(spec, uncompressedDataSize);
+  if (loadType == ImageLoadType::Raw) {
+    if (!outFrame.readDiskImageData(reader, contentBlock)) {
+      XR_LOGE("Failed to read image data");
+      return false;
+    }
+  } else {
+    auto frame = make_shared<PixelFrame>();
+    if (!frame->readFrame(reader, contentBlock)) {
+      XR_LOGE("Failed to read and decode image data");
+      return false;
+    }
+    if (loadType == ImageLoadType::Normalize8 || loadType == ImageLoadType::Normalize16) {
+      shared_ptr<PixelFrame> normalizedFrame;
+      PixelFrame::normalizeFrame(frame, normalizedFrame, loadType == ImageLoadType::Normalize16);
+      frame = normalizedFrame;
+    }
+    outFrame.init(frame->getSpec(), std::move(frame->getBuffer()));
+  }
+
+  return true;
+}
+
+} // namespace vrs::utils
diff --git a/vrs/utils/ImageLoader.h b/vrs/utils/ImageLoader.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+
+#include <memory>
+#include <utility>
+
+#include <vrs/ForwardDefinitions.h>
+#include <vrs/utils/ImageIndexer.h>
+#include <vrs/utils/PixelFrame.h>
+
+namespace vrs::utils {
+
+enum class ImageLoadType {
+  Raw, // load the bytes as-is, minimum processing
+  Decode, // decode the image if it's compressed, returning a PixelFrame of type RAW
+  Normalize8, // decode the image, if it's compressed, and normalize it to grey8, or rgb8
+  Normalize16, // decode the image, if it's compressed, and normalize it to grey8, grey16, or rgb8
+};
+
+/// Load an image from a buffer.
+/// @param data: pointer to the data.
+/// @param length: length of the data.
+/// @param outFrame: on exit, the image read.
+/// @param imageRef: an image reference. Note: dataOffset is ignored since the data was already read
+/// @param loadType: how to load the image.
+/// @return True on success.
+bool loadImage(
+    const void* data,
+    size_t length,
+    PixelFrame& outFrame,
+    const DirectImageReference& imageRef,
+    ImageLoadType loadType = ImageLoadType::Raw);
+
+template <typename T>
+inline bool loadImage(
+    const vector<T>& data,
+    PixelFrame& outFrame,
+    const DirectImageReference& imageRef,
+    ImageLoadType loadType = ImageLoadType::Raw) {
+  return loadImage(data.data(), data.size() * sizeof(T), outFrame, imageRef, loadType);
+}
+
+/// Load an image directly from an open file, without having to parse the file.
+/// @param file: an open file containing the data.
+/// @param outFrame: on exit, the image read.
+/// @param imageRef: an image reference.
+/// @param loadType: how to load the image.
+/// @return True on success.
+bool loadImage(
+    FileHandler& file,
+    PixelFrame& outFrame,
+    const DirectImageReference& imageRef,
+    ImageLoadType loadType = ImageLoadType::Raw);
+
+} // namespace vrs::utils
diff --git a/vrs/utils/test/ImageIndexerLoaderTest.cpp b/vrs/utils/test/ImageIndexerLoaderTest.cpp
@@ -18,8 +18,11 @@
 
 #include <TestDataDir/TestDataDir.h>
 
+#include <vrs/FileHandlerFactory.h>
 #include <vrs/os/Utils.h>
 #include <vrs/utils/ImageIndexer.h>
+#include <vrs/utils/ImageLoader.h>
+#include <vrs/utils/xxhash/xxhash.h>
 
 using namespace std;
 using namespace vrs;
@@ -30,6 +33,43 @@ struct ImageIndexerLoaderTest : testing::Test {
   string kJpgFile = os::pathJoin(coretech::getTestDataDir(), "VRS_Files/jpg.vrs");
 };
 
+// Test the loadImage API reading from a file
+static int loadFrameFromFile(
+    FileHandler& file,
+    const DirectImageReference& image,
+    const string& format,
+    uint64_t hash) {
+  PixelFrame frame;
+  if (!loadImage(file, frame, image)) {
+    return 2;
+  }
+  EXPECT_EQ(frame.getSpec().asString(), format);
+  XXH64Digester digester;
+  digester.ingest(frame.getBuffer());
+  EXPECT_EQ(digester.digest(), hash);
+  return 0;
+}
+
+// Test the loadImage API reading from a memory buffer
+static int loadFrameFromMemory(
+    FileHandler& file,
+    const DirectImageReference& image,
+    const string& format,
+    uint64_t hash) {
+  vector<char> buffer(image.dataSize);
+  file.setPos(image.dataOffset);
+  EXPECT_EQ(file.read(buffer.data(), buffer.size()), 0);
+  PixelFrame frame;
+  if (!loadImage(buffer, frame, image)) {
+    return 2;
+  }
+  EXPECT_EQ(frame.getSpec().asString(), format);
+  XXH64Digester digester;
+  digester.ingest(frame.getBuffer());
+  EXPECT_EQ(digester.digest(), hash);
+  return 0;
+}
+
 TEST_F(ImageIndexerLoaderTest, ImageIndexerLoaderTest) {
   vector<DirectImageReference> readImages;
 
@@ -40,11 +80,20 @@ TEST_F(ImageIndexerLoaderTest, ImageIndexerLoaderTest) {
       {2108199, 2106944, format, CompressionType::Zstd, 52, 3760128},
       {4215175, 2106022, format, CompressionType::Zstd, 52, 3760128},
   };
-  EXPECT_EQ(readImages, expectedImages);
+  ASSERT_EQ(readImages, expectedImages);
+  unique_ptr<FileHandler> file;
+  ASSERT_EQ(FileHandlerFactory::getInstance().delegateOpen(kRgbFile, file), 0);
+  EXPECT_EQ(loadFrameFromFile(*file, readImages[0], format, 4114475262886596638ULL), 0);
+  EXPECT_EQ(loadFrameFromFile(*file, readImages[1], format, 16026781315276957005ULL), 0);
+  EXPECT_EQ(loadFrameFromFile(*file, readImages[2], format, 8098506684566711634ULL), 0);
+  EXPECT_EQ(loadFrameFromMemory(*file, readImages[1], format, 16026781315276957005ULL), 0);
 
   ASSERT_EQ(indexImages(kJpgFile, readImages), 0);
   expectedImages = {
       {6046, 1985655, "jpg", CompressionType::None, 0, 0},
   };
   EXPECT_EQ(readImages, expectedImages);
+  ASSERT_EQ(FileHandlerFactory::getInstance().delegateOpen(kJpgFile, file), 0);
+  EXPECT_EQ(loadFrameFromFile(*file, readImages[0], "jpg", 10323177114171200117ULL), 0);
+  EXPECT_EQ(loadFrameFromMemory(*file, readImages[0], "jpg", 10323177114171200117ULL), 0);
 }