From 9d3b282649e952b395a38f82ef42f00afe39a755 Mon Sep 17 00:00:00 2001 From: Georges Berenger Date: Wed, 1 Nov 2023 15:12:03 -0700 Subject: [PATCH] Rework video decoder API to allow for more complex creation cases Summary: With HW decoders, we can find that different implementation of the same codec format have different pixel format support, which means that we can't always decode files we encoded with H.264 or H.265 with any implementation of H.264 or H.265. The only way to tell if a codec implementation can be used, is to decode a frame, and see if we can handle the pixel format of the frame produced by xprs (that is, ffmpeg SW or HW). This forces us to completely revamp the API, which is what this diff is about. Now, for a codec creation to be successful, we will: - have to find an implementation that handles the codec (H.264, H.265, etc) - decode a first frame (must be a key frame) - be able to convert that image to the vrs pixel format that we expect (if we can convert the output's pixel format into a vrs::PixelFrame, then we can use the decoder). While this is a change in behavior, with SW codecs, this should not fundamentally change how codecs are handled, but it opens the door for HW codecs support coming next. Reviewed By: finik Differential Revision: D48496973 fbshipit-source-id: 224cac3b07f03918503e00be759583d2783c92de --- vrs/utils/DecoderFactory.cpp | 10 +++++++--- vrs/utils/DecoderFactory.h | 19 +++++++++++-------- vrs/utils/VideoFrameHandler.cpp | 21 ++++++++++++--------- vrs/utils/VideoFrameHandler.h | 1 + 4 files changed, 31 insertions(+), 20 deletions(-) diff --git a/vrs/utils/DecoderFactory.cpp b/vrs/utils/DecoderFactory.cpp index 73b02499..3a2ce026 100644 --- a/vrs/utils/DecoderFactory.cpp +++ b/vrs/utils/DecoderFactory.cpp @@ -23,6 +23,7 @@ #include #include +#include using namespace std; @@ -66,14 +67,17 @@ void DecoderFactory::registerDecoderMaker(DecoderMaker decoderMaker) { decoderMakers_.emplace_back(decoderMaker); } -unique_ptr DecoderFactory::makeDecoder(const string& codecName) { +unique_ptr DecoderFactory::makeDecoder( + const vector& encodedFrame, + void* outDecodedFrame, + const ImageContentBlockSpec& outputImageSpec) { for (const DecoderMaker& decoderMaker : decoderMakers_) { - unique_ptr decoder = decoderMaker(codecName); + unique_ptr decoder = decoderMaker(encodedFrame, outDecodedFrame, outputImageSpec); if (decoder) { return decoder; } } - XR_LOGW("Could not create a decoder for '{}'!", codecName); + XR_LOGW("Could not create a decoder for '{}'!", outputImageSpec.getCodecName()); return nullptr; } diff --git a/vrs/utils/DecoderFactory.h b/vrs/utils/DecoderFactory.h index 35d78960..69ac4aa0 100644 --- a/vrs/utils/DecoderFactory.h +++ b/vrs/utils/DecoderFactory.h @@ -43,15 +43,15 @@ class DecoderI { virtual ~DecoderI(); /// Decode compressed image to a frame virtual int decode( - RecordReader* reader, - const uint32_t sizeBytes, - void* outBuffer, - const ImageContentBlockSpec& inputImageSpec) = 0; - /// Decode compressed image, to update internal buffers - virtual int decode(RecordReader* reader, const uint32_t sizeBytes) = 0; + const vector& encodedFrame, + void* outDecodedFrame, + const ImageContentBlockSpec& outputImageSpec) = 0; }; -using DecoderMaker = std::function(const std::string& codecName)>; +using DecoderMaker = std::function( + const vector& encodedFrame, + void* outDecodedFrame, + const ImageContentBlockSpec& outputImageSpec)>; class DecoderFactory { public: @@ -59,7 +59,10 @@ class DecoderFactory { void registerDecoderMaker(DecoderMaker decoderMaker); - std::unique_ptr makeDecoder(const std::string& codecName); + std::unique_ptr makeDecoder( + const vector& encodedFrame, + void* outDecodedFrame, + const ImageContentBlockSpec& outputImageSpec); protected: DecoderFactory() = default; diff --git a/vrs/utils/VideoFrameHandler.cpp b/vrs/utils/VideoFrameHandler.cpp index 10863777..6739fe1c 100644 --- a/vrs/utils/VideoFrameHandler.cpp +++ b/vrs/utils/VideoFrameHandler.cpp @@ -21,6 +21,7 @@ #include #include +#include #include using namespace std; @@ -28,12 +29,13 @@ using namespace std; namespace vrs::utils { int VideoFrameHandler::tryToDecodeFrame( - void* outBuffer, + void* outDecodedFrame, RecordReader* reader, const ContentBlock& contentBlock) { + const ImageContentBlockSpec& spec = contentBlock.image(); isVideo_ = true; - requestedKeyFrameTimestamp_ = contentBlock.image().getKeyFrameTimestamp(); - requestedKeyFrameIndex_ = contentBlock.image().getKeyFrameIndex(); + requestedKeyFrameTimestamp_ = spec.getKeyFrameTimestamp(); + requestedKeyFrameIndex_ = spec.getKeyFrameIndex(); videoGoodState_ = requestedKeyFrameIndex_ == 0 || (requestedKeyFrameTimestamp_ == decodedKeyFrameTimestamp_ && requestedKeyFrameIndex_ == decodedKeyFrameIndex_ + 1); @@ -41,13 +43,14 @@ int VideoFrameHandler::tryToDecodeFrame( decodedKeyFrameTimestamp_ = requestedKeyFrameTimestamp_; decodedKeyFrameIndex_ = requestedKeyFrameIndex_; // XR_LOGI("Reading frame {}/{}", requestedKeyFrameTimestamp_, requestedKeyFrameIndex_); - if (!decoder_) { - decoder_ = DecoderFactory::get().makeDecoder(contentBlock.image().getCodecName()); - if (!decoder_) { - return domainError(DecodeStatus::CodecNotFound); - } + encodedFrame_.resize(contentBlock.getBlockSize()); + IF_ERROR_LOG_AND_RETURN(reader->read(encodedFrame_)); + if (decoder_) { + return decoder_->decode(encodedFrame_, outDecodedFrame, contentBlock.image()); } - return decoder_->decode(reader, contentBlock.getBlockSize(), outBuffer, contentBlock.image()); + decoder_ = + DecoderFactory::get().makeDecoder(encodedFrame_, outDecodedFrame, contentBlock.image()); + return decoder_ ? SUCCESS : domainError(DecodeStatus::CodecNotFound); } if (requestedKeyFrameTimestamp_ == decodedKeyFrameTimestamp_) { XR_LOGW( diff --git a/vrs/utils/VideoFrameHandler.h b/vrs/utils/VideoFrameHandler.h index 21a96f78..e191e110 100644 --- a/vrs/utils/VideoFrameHandler.h +++ b/vrs/utils/VideoFrameHandler.h @@ -99,6 +99,7 @@ class VideoFrameHandler { private: std::unique_ptr decoder_; + std::vector encodedFrame_; double decodedKeyFrameTimestamp_{}; uint32_t decodedKeyFrameIndex_{kInvalidFrameIndex};