From 1fa438ab30927ee0650fe7f6d79704dfbb0a9412 Mon Sep 17 00:00:00 2001 From: Dan Dennedy Date: Thu, 15 Feb 2024 15:15:17 -0800 Subject: [PATCH] add Ambisonic metadata to Set Equirectangular --- src/spatialmedia/sa3d.cpp | 32 +++--- src/spatialmedia/sa3d.h | 8 +- src/spatialmedia/spatialmedia.cpp | 133 ++++++++++++++++++++++++- src/widgets/avformatproducerwidget.cpp | 3 +- 4 files changed, 157 insertions(+), 19 deletions(-) diff --git a/src/spatialmedia/sa3d.cpp b/src/spatialmedia/sa3d.cpp index 18dec07645..02e37e8e1d 100644 --- a/src/spatialmedia/sa3d.cpp +++ b/src/spatialmedia/sa3d.cpp @@ -1,6 +1,7 @@ /***************************************************************************** * * Copyright 2016 Varol Okan. All rights reserved. + * Copyright (c) 2024 Meltytech, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,17 +27,18 @@ #include #include #include +#include #include "constants.h" #include "sa3d.h" -SA3DBox::SA3DBox ( Box * ) +SA3DBox::SA3DBox () : Box ( ) { memcpy ( m_name, constants::TAG_SA3D, 4 ); m_iHeaderSize = 8; m_iPosition = 0; - m_iContentSize = -1; + m_iContentSize = 0; m_iVersion = 0; m_iAmbisonicType = 0; m_iAmbisonicOrder= 0; @@ -98,17 +100,18 @@ Box *SA3DBox::load ( std::fstream &fs, uint32_t iPos, uint32_t iEnd ) return pNewBox; } -Box *SA3DBox::create ( int32_t iNumChannels, AudioMetadata &amData ) +Box *SA3DBox::create (int32_t iNumChannels) { - (void) amData; // unused // audio_metadata: dictionary ('ambisonic_type': string, 'ambisonic_order': int), SA3DBox *pNewBox = new SA3DBox ( ); pNewBox->m_iHeaderSize = 8; memcpy ( pNewBox->m_name, constants::TAG_SA3D, 4 ); + pNewBox->m_iAmbisonicOrder = ::sqrt(iNumChannels) - 1; + pNewBox->m_iVersion = 0; // # uint8 pNewBox->m_iContentSize += 1; // # uint8 -// pNewBox->m_iAmbisonicType= pNewBox->m_AmbisonicTypes[amData["ambisonic_type"]]; +// pNewBox->m_iAmbisonicType= pNewBox->m_AmbisonicTypes[amData["ambisonic_type"]]; pNewBox->m_iContentSize += 1; // # uint8 // pNewBox->m_iAmbisonicOrder = amData["ambisonic_order"]; pNewBox->m_iContentSize += 4; // # uint32 @@ -119,11 +122,15 @@ Box *SA3DBox::create ( int32_t iNumChannels, AudioMetadata &amData ) pNewBox->m_iNumChannels = iNumChannels; pNewBox->m_iContentSize += 4; // # uint32 - std::vector map; // = amData["channel_map"]; - std::vector::iterator it = map.begin ( ); - while ( it != map.end ( ) ) { - pNewBox->m_ChannelMap.push_back ( *it++ ); - pNewBox->m_iContentSize += 4; + // std::vector map; // = amData["channel_map"]; + // std::vector::iterator it = map.begin ( ); + // while ( it != map.end ( ) ) { + // pNewBox->m_ChannelMap.push_back ( *it++ ); + // pNewBox->m_iContentSize += 4; + // } + for (uint32_t i = 0; i < iNumChannels; i++) { + pNewBox->m_ChannelMap.push_back(i); + pNewBox->m_iContentSize += 4; // # uint32 } return pNewBox; } @@ -132,16 +139,15 @@ void SA3DBox::save (std::fstream &fsIn, std::fstream &fsOut , int32_t) { (void) fsIn; // unused //char tmp, name[4]; - uint64_t iSize = m_iContentSize; if ( m_iHeaderSize == 16 ) { writeUint32 ( fsOut, 1 ); fsOut.write ( m_name, 4 ); - writeUint64 ( fsOut, iSize ); + writeUint64 ( fsOut, size() ); //fsOut.write ( name, 4 ); I think this is a bug in the original code here. } else if ( m_iHeaderSize == 8 ) { - writeUint32 ( fsOut, m_iContentSize ); + writeUint32 ( fsOut, size() ); fsOut.write ( m_name, 4 ); } diff --git a/src/spatialmedia/sa3d.h b/src/spatialmedia/sa3d.h index 973f788107..2cf1933921 100644 --- a/src/spatialmedia/sa3d.h +++ b/src/spatialmedia/sa3d.h @@ -2,7 +2,8 @@ /***************************************************************************** * * Copyright 2016 Varol Okan. All rights reserved. - * + * Copyright (c) 2024 Meltytech, LLC + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -33,17 +34,16 @@ class SA3DBox : public Box { - SA3DBox ( ) { }; public: enum ePosition { None }; - SA3DBox ( Box * ); + SA3DBox (); virtual ~SA3DBox ( ); // Loads the SA3D box located at position pos in a mp4 file. static Box *load ( std::fstream &fs, uint32_t iPos, uint32_t iEnd ); - static Box *create ( int32_t iNumChannels, AudioMetadata & ); + static Box *create ( int32_t iNumChannels ); virtual void save ( std::fstream &fsIn, std::fstream &fsOut, int32_t ); const char *ambisonic_type_name ( ); diff --git a/src/spatialmedia/spatialmedia.cpp b/src/spatialmedia/spatialmedia.cpp index 9688aa5037..db16895fbb 100644 --- a/src/spatialmedia/spatialmedia.cpp +++ b/src/spatialmedia/spatialmedia.cpp @@ -1,7 +1,7 @@ /***************************************************************************** * * Copyright 2016 Varol Okan. All rights reserved. - * Copyright (c) 2020 Meltytech, LLC + * Copyright (c) 2020-2024 Meltytech, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include "spatialmedia.h" #include "mpeg4_container.h" +#include "sa3d.h" #include #include @@ -77,6 +78,132 @@ static Box *spherical_uuid ( std::string &strMetadata ) return p; } +static int get_descriptor_length(std::fstream &inFile) +{ + auto result = 0; + uint8_t size_byte; + + for (int i = 0; i < 4; i++) { + inFile.read((char*) &size_byte, 1); + result = (result << 7) | (size_byte & 0x7f); + if (size_byte != 0x80) break; + } + return result; +} + +static int get_aac_num_channels(Box *mp4aBox, std::fstream &inFile) +{ + auto result = -1; + auto size = sizeof(mp4aBox->m_name); + auto pos = inFile.tellg(); + + for (auto box : static_cast(mp4aBox)->m_listContents) { + if (!memcmp(constants::TAG_WAVE, box->m_name, size)) { + // Handle .mov with AAC audio: stsd -> mp4a -> wave -> esds + return get_aac_num_channels(box, inFile); + } else if (!memcmp(constants::TAG_ESDS, box->m_name, size)) { + // Read the AAC AudioSpecificConfig + char data[2]; + inFile.seekg(box->content_start() + 4); + // Verify the read descriptor is an elementary stream descriptor + inFile.read(data, 1); + if (data[0] != 3) break; + // Verify the read descriptor is a decoder config. descriptor + auto length = get_descriptor_length(inFile); + inFile.seekg(3, std::ios_base::cur); + inFile.read(data, 1); + if (data[0] != 4) break; + // Verify the read descriptor is a decoder specific info descriptor + length = get_descriptor_length(inFile); + inFile.seekg(13, std::ios_base::cur); // offset to the decoder specific config descriptor + inFile.read(data, 1); + if (data[0] != 5) break; + auto audio_specific_descriptor_size = get_descriptor_length(inFile); + if (audio_specific_descriptor_size < 2) break; + inFile.read(data, 2); + auto object_type = (data[0] >> 3) & 0x1f; + if (object_type != 2) break; + auto sampling_frequency_index = ((data[0] & 0x07) << 1 | (data[1] >> 7) & 0x01); + // TODO: If the sample rate is 96kHz an additional 24 bit offset + // value here specifies the actual sample rate. + if (sampling_frequency_index == 0) break; + result = (data[1] >> 3) & 0x0f; + } + } + inFile.seekg(pos); + return result; +} + +static bool sound_samples_contains(const char *name) +{ + auto nameSize = sizeof(Box::m_name); + auto size = sizeof(constants::SOUND_SAMPLE_DESCRIPTIONS) / nameSize; + for (int i = 0; i < size; i++) { + if (!memcmp(name, constants::SOUND_SAMPLE_DESCRIPTIONS[i], nameSize)) + return true; + } + return false; +} + +static int get_sample_description_num_channels(Box *ssdBox, std::fstream &inFile) +{ + auto result = -1; + auto size = sizeof(ssdBox->m_name); + auto pos = inFile.tellg(); + char data[4]; + + // Read the AAC AudioSpecificConfig + inFile.seekg(ssdBox->content_start() + 8); + inFile.read(data, 2); + auto version = (data[0] << 8) | data[1]; + inFile.seekg(2 + 4, std::ios_base::cur); // revision_level and vendor + switch (version) { + case 0: + case 1: + inFile.read(data, 2); + result = (data[0] << 8) | data[1]; + break; + case 2: + inFile.seekg(24, std::ios_base::cur); + inFile.read(data, 4); + result = 0; + for (int i = 0; i < 4; i++) + result = (result << 8) | data[i]; + break; + } + inFile.seekg(pos); + return result; +} + +static void mpeg4_add_spatial_audio(Box *mdiaBox, std::fstream &inFile) +{ + auto size = sizeof(mdiaBox->m_name); + for (auto box : static_cast(mdiaBox)->m_listContents) { + if (!memcmp(constants::TAG_MINF, box->m_name, size)) { + for (auto box : static_cast(box)->m_listContents) { + if (!memcmp(constants::TAG_STBL, box->m_name, size)) { + for (auto box : static_cast(box)->m_listContents) { + if (!memcmp(constants::TAG_STSD, box->m_name, size)) { + for (auto box : static_cast(box)->m_listContents) { + auto channels = 0; + if (!memcmp(constants::TAG_MP4A, box->m_name, size)) { + channels = get_aac_num_channels(box, inFile); + } else if (sound_samples_contains(box->m_name)) { + channels = get_sample_description_num_channels(box, inFile); + } + if (4 == channels) { + static_cast(box)->add(SA3DBox::create(channels)); + break; + } + } + } + } + } + } + } + } +} + static bool mpeg4_add_spherical ( Mpeg4Container *pMPEG4, std::fstream &inFile, std::string &strMetadata ) { // Adds a spherical uuid box to an mpeg4 file for all video tracks. @@ -108,6 +235,10 @@ static bool mpeg4_add_spherical ( Mpeg4Container *pMPEG4, std::fstream &inFile, std::vector::iterator it3 = pSub->m_listContents.begin ( ); while ( it3 != pSub->m_listContents.end ( ) ) { Box *pMDIA = *it3++; + if ( memcmp ( pMDIA->m_name, constants::TAG_MINF, 4 ) == 0 ) { + mpeg4_add_spatial_audio(pSub, inFile); + continue; + } if ( memcmp ( pMDIA->m_name, constants::TAG_HDLR, 4 ) != 0 ) continue; diff --git a/src/widgets/avformatproducerwidget.cpp b/src/widgets/avformatproducerwidget.cpp index 3c4a6b9946..493404aa58 100644 --- a/src/widgets/avformatproducerwidget.cpp +++ b/src/widgets/avformatproducerwidget.cpp @@ -665,7 +665,8 @@ void AvformatProducerWidget::on_menuButton_clicked() menu.addAction(ui->actionFFmpegConvert); menu.addAction(ui->actionExtractSubclip); menu.addAction(ui->actionSetFileDate); - if (Util::GetFilenameFromProducer(producer()).toLower().endsWith(".mp4")) { + if (Util::GetFilenameFromProducer(producer()).toLower().endsWith(".mp4") + || Util::GetFilenameFromProducer(producer()).toLower().endsWith(".mov")) { menu.addAction(ui->actionSetEquirectangular); } menu.addAction(ui->actionFFmpegVideoQuality);