diff --git a/av/filter/loudnorm.pxd b/av/filter/loudnorm.pxd new file mode 100644 index 000000000..b08d3502f --- /dev/null +++ b/av/filter/loudnorm.pxd @@ -0,0 +1,4 @@ +from av.audio.stream cimport AudioStream + + +cpdef bytes stats(str loudnorm_args, AudioStream stream) diff --git a/av/filter/loudnorm.pyi b/av/filter/loudnorm.pyi new file mode 100644 index 000000000..c680f638d --- /dev/null +++ b/av/filter/loudnorm.pyi @@ -0,0 +1,3 @@ +from av.audio.stream import AudioStream + +def stats(loudnorm_args: str, stream: AudioStream) -> bytes: ... diff --git a/av/filter/loudnorm.pyx b/av/filter/loudnorm.pyx new file mode 100644 index 000000000..662bbd230 --- /dev/null +++ b/av/filter/loudnorm.pyx @@ -0,0 +1,63 @@ +# av/filter/loudnorm.pyx + +cimport libav as lib +from cpython.bytes cimport PyBytes_FromString +from libc.stdlib cimport free + +from av.audio.codeccontext cimport AudioCodecContext +from av.audio.stream cimport AudioStream +from av.container.core cimport Container +from av.stream cimport Stream + + +cdef extern from "libavcodec/avcodec.h": + ctypedef struct AVCodecContext: + pass + +cdef extern from "libavformat/avformat.h": + ctypedef struct AVFormatContext: + pass + +cdef extern from "loudnorm_impl.h": + char* loudnorm_get_stats( + AVFormatContext* fmt_ctx, + int audio_stream_index, + const char* loudnorm_args + ) nogil + + +cpdef bytes stats(str loudnorm_args, AudioStream stream): + """ + Get loudnorm statistics for an audio stream. + + Args: + loudnorm_args (str): Arguments for the loudnorm filter (e.g. "i=-24.0:lra=7.0:tp=-2.0") + stream (AudioStream): Input audio stream to analyze + + Returns: + bytes: JSON string containing the loudnorm statistics + """ + + if "print_format=json" not in loudnorm_args: + loudnorm_args = loudnorm_args + ":print_format=json" + + cdef Container container = stream.container + cdef AVFormatContext* format_ptr = container.ptr + + container.ptr = NULL # Prevent double-free + + cdef int stream_index = stream.index + cdef bytes py_args = loudnorm_args.encode("utf-8") + cdef const char* c_args = py_args + cdef char* result + + with nogil: + result = loudnorm_get_stats(format_ptr, stream_index, c_args) + + if result == NULL: + raise RuntimeError("Failed to get loudnorm stats") + + py_result = result[:] # Make a copy of the string + free(result) # Free the C string + + return py_result diff --git a/av/filter/loudnorm_impl.c b/av/filter/loudnorm_impl.c new file mode 100644 index 000000000..e8d56ddba --- /dev/null +++ b/av/filter/loudnorm_impl.c @@ -0,0 +1,211 @@ +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 + #include +#else + #include +#endif + +#ifdef _WIN32 + static CRITICAL_SECTION json_mutex; + static CONDITION_VARIABLE json_cond; + static int mutex_initialized = 0; +#else + static pthread_mutex_t json_mutex = PTHREAD_MUTEX_INITIALIZER; + static pthread_cond_t json_cond = PTHREAD_COND_INITIALIZER; +#endif + +static char json_buffer[2048] = {0}; +static int json_captured = 0; + +// Custom logging callback +static void logging_callback(void *ptr, int level, const char *fmt, va_list vl) { + char line[2048]; + vsnprintf(line, sizeof(line), fmt, vl); + + const char *json_start = strstr(line, "{"); + if (json_start) { + #ifdef _WIN32 + EnterCriticalSection(&json_mutex); + #else + pthread_mutex_lock(&json_mutex); + #endif + + strncpy(json_buffer, json_start, sizeof(json_buffer) - 1); + json_captured = 1; + + #ifdef _WIN32 + WakeConditionVariable(&json_cond); + LeaveCriticalSection(&json_mutex); + #else + pthread_cond_signal(&json_cond); + pthread_mutex_unlock(&json_mutex); + #endif + } +} + +char* loudnorm_get_stats( + AVFormatContext* fmt_ctx, + int audio_stream_index, + const char* loudnorm_args +) { + char* result = NULL; + json_captured = 0; // Reset the captured flag + memset(json_buffer, 0, sizeof(json_buffer)); // Clear the buffer + + #ifdef _WIN32 + // Initialize synchronization objects if needed + if (!mutex_initialized) { + InitializeCriticalSection(&json_mutex); + InitializeConditionVariable(&json_cond); + mutex_initialized = 1; + } + #endif + + av_log_set_callback(logging_callback); + + AVFilterGraph *filter_graph = NULL; + AVFilterContext *src_ctx = NULL, *sink_ctx = NULL, *loudnorm_ctx = NULL; + + AVCodec *codec = NULL; + AVCodecContext *codec_ctx = NULL; + int ret; + + AVCodecParameters *codecpar = fmt_ctx->streams[audio_stream_index]->codecpar; + codec = (AVCodec *)avcodec_find_decoder(codecpar->codec_id); + codec_ctx = avcodec_alloc_context3(codec); + avcodec_parameters_to_context(codec_ctx, codecpar); + avcodec_open2(codec_ctx, codec, NULL); + + char ch_layout_str[64]; + av_channel_layout_describe(&codecpar->ch_layout, ch_layout_str, sizeof(ch_layout_str)); + + filter_graph = avfilter_graph_alloc(); + + char args[512]; + snprintf(args, sizeof(args), + "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=%s", + fmt_ctx->streams[audio_stream_index]->time_base.num, + fmt_ctx->streams[audio_stream_index]->time_base.den, + codecpar->sample_rate, + av_get_sample_fmt_name(codec_ctx->sample_fmt), + ch_layout_str); + + avfilter_graph_create_filter(&src_ctx, avfilter_get_by_name("abuffer"), + "src", args, NULL, filter_graph); + avfilter_graph_create_filter(&sink_ctx, avfilter_get_by_name("abuffersink"), + "sink", NULL, NULL, filter_graph); + avfilter_graph_create_filter(&loudnorm_ctx, avfilter_get_by_name("loudnorm"), + "loudnorm", loudnorm_args, NULL, filter_graph); + + avfilter_link(src_ctx, 0, loudnorm_ctx, 0); + avfilter_link(loudnorm_ctx, 0, sink_ctx, 0); + avfilter_graph_config(filter_graph, NULL); + + AVPacket *packet = av_packet_alloc(); + AVFrame *frame = av_frame_alloc(); + AVFrame *filt_frame = av_frame_alloc(); + + while ((ret = av_read_frame(fmt_ctx, packet)) >= 0) { + if (packet->stream_index != audio_stream_index) { + av_packet_unref(packet); + continue; + } + + ret = avcodec_send_packet(codec_ctx, packet); + if (ret < 0) { + av_packet_unref(packet); + continue; + } + + while (ret >= 0) { + ret = avcodec_receive_frame(codec_ctx, frame); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break; + if (ret < 0) goto end; + + ret = av_buffersrc_add_frame_flags(src_ctx, frame, AV_BUFFERSRC_FLAG_KEEP_REF); + if (ret < 0) goto end; + + while (1) { + ret = av_buffersink_get_frame(sink_ctx, filt_frame); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) break; + if (ret < 0) goto end; + av_frame_unref(filt_frame); + } + } + av_packet_unref(packet); + } + + // Flush decoder + avcodec_send_packet(codec_ctx, NULL); + while (avcodec_receive_frame(codec_ctx, frame) >= 0) { + av_buffersrc_add_frame(src_ctx, frame); + } + + // Flush filter + av_buffersrc_add_frame(src_ctx, NULL); + while (av_buffersink_get_frame(sink_ctx, filt_frame) >= 0) { + av_frame_unref(filt_frame); + } + + // Force stats print + if (loudnorm_ctx) { + av_log_set_level(AV_LOG_INFO); + av_opt_set(loudnorm_ctx, "print_format", "json", AV_OPT_SEARCH_CHILDREN); + av_opt_set(loudnorm_ctx, "measured_i", NULL, AV_OPT_SEARCH_CHILDREN); + av_opt_set(loudnorm_ctx, "measured_lra", NULL, AV_OPT_SEARCH_CHILDREN); + av_opt_set(loudnorm_ctx, "measured_tp", NULL, AV_OPT_SEARCH_CHILDREN); + av_opt_set(loudnorm_ctx, "measured_thresh", NULL, AV_OPT_SEARCH_CHILDREN); + avfilter_init_str(loudnorm_ctx, NULL); + } + + avfilter_graph_request_oldest(filter_graph); + +end: + avcodec_free_context(&codec_ctx); + avfilter_graph_free(&filter_graph); + avformat_close_input(&fmt_ctx); + av_frame_free(&filt_frame); + av_frame_free(&frame); + av_packet_free(&packet); + + #ifdef _WIN32 + EnterCriticalSection(&json_mutex); + while (!json_captured) { + if (!SleepConditionVariableCS(&json_cond, &json_mutex, 5000)) { // 5 second timeout + fprintf(stderr, "Timeout waiting for JSON data\n"); + break; + } + } + if (json_captured) { + result = _strdup(json_buffer); // Use _strdup on Windows + } + LeaveCriticalSection(&json_mutex); + #else + struct timespec timeout; + clock_gettime(CLOCK_REALTIME, &timeout); + timeout.tv_sec += 5; // 5 second timeout + + pthread_mutex_lock(&json_mutex); + while (json_captured == 0) { + int ret = pthread_cond_timedwait(&json_cond, &json_mutex, &timeout); + if (ret == ETIMEDOUT) { + fprintf(stderr, "Timeout waiting for JSON data\n"); + break; + } + } + if (json_captured) { + result = strdup(json_buffer); + } + pthread_mutex_unlock(&json_mutex); + #endif + + av_log_set_callback(av_log_default_callback); + return result; +} \ No newline at end of file diff --git a/av/filter/loudnorm_impl.h b/av/filter/loudnorm_impl.h new file mode 100644 index 000000000..7357e4668 --- /dev/null +++ b/av/filter/loudnorm_impl.h @@ -0,0 +1,12 @@ +#ifndef AV_FILTER_LOUDNORM_H +#define AV_FILTER_LOUDNORM_H + +#include + +char* loudnorm_get_stats( + AVFormatContext* fmt_ctx, + int audio_stream_index, + const char* loudnorm_args +); + +#endif // AV_FILTER_LOUDNORM_H \ No newline at end of file diff --git a/setup.py b/setup.py index 935d233aa..685412b99 100644 --- a/setup.py +++ b/setup.py @@ -153,10 +153,36 @@ def parse_cflags(raw_flags): "library_dirs": [], } +loudnorm_extension = Extension( + "av.filter.loudnorm", + sources=[ + "av/filter/loudnorm.pyx", + "av/filter/loudnorm_impl.c", + ], + include_dirs=["av/filter"] + extension_extra["include_dirs"], + libraries=extension_extra["libraries"], + library_dirs=extension_extra["library_dirs"], +) + +# Add the cythonized loudnorm extension to ext_modules +ext_modules = cythonize( + loudnorm_extension, + compiler_directives={ + "c_string_type": "str", + "c_string_encoding": "ascii", + "embedsignature": True, + "language_level": 3, + }, + build_dir="src", + include_path=["include"], +) + # Construct the modules that we find in the "av" directory. -ext_modules = [] for dirname, dirnames, filenames in os.walk("av"): for filename in filenames: + if filename == "loudnorm.pyx": + continue + # We are looking for Cython sources. if filename.startswith(".") or os.path.splitext(filename)[1] != ".pyx": continue diff --git a/tests/test_streams.py b/tests/test_streams.py index b7699e622..c7b234d48 100644 --- a/tests/test_streams.py +++ b/tests/test_streams.py @@ -26,6 +26,17 @@ def test_stream_tuples(self) -> None: audio_streams = tuple([s for s in container.streams if s.type == "audio"]) assert audio_streams == container.streams.audio + def test_loudnorm(self) -> None: + container = av.open( + fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv") + ) + audio = container.streams.audio[0] + stats = av.filter.loudnorm.stats("i=-24.0:lra=7.0:tp=-2.0", audio) + + assert isinstance(stats, bytes) and len(stats) > 30 + assert b"inf" not in stats + assert b'"input_i"' in stats + def test_selection(self) -> None: container = av.open( fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv")