diff --git a/README b/README index 58c940b..31ddcc3 100644 --- a/README +++ b/README @@ -1,7 +1,7 @@ FFmpeg version of headers required to interface with Nvidias codec APIs. -Corresponds to Video Codec SDK version 10.0.26. +Corresponds to Video Codec SDK version 11.0.10. Minimum required driver versions: -Linux: 445.87 or newer -Windows: 450.51 or newer +Linux: 455.28 or newer +Windows: 456.71 or newer diff --git a/ffnvcodec.pc.in b/ffnvcodec.pc.in index 9fd8b4f..b952df4 100644 --- a/ffnvcodec.pc.in +++ b/ffnvcodec.pc.in @@ -3,5 +3,5 @@ includedir=${prefix}/include Name: ffnvcodec Description: FFmpeg version of Nvidia Codec SDK headers -Version: 10.0.26.2 +Version: 11.0.10.0 Cflags: -I${includedir} diff --git a/include/ffnvcodec/dynlink_cuviddec.h b/include/ffnvcodec/dynlink_cuviddec.h index b223198..86a1bc3 100644 --- a/include/ffnvcodec/dynlink_cuviddec.h +++ b/include/ffnvcodec/dynlink_cuviddec.h @@ -28,7 +28,6 @@ /*****************************************************************************************************/ //! \file cuviddec.h //! NVDECODE API provides video decoding interface to NVIDIA GPU devices. -//! \date 2015-2019 //! This file contains constants, structure definitions and function prototypes used for decoding. /*****************************************************************************************************/ @@ -41,7 +40,7 @@ #endif #endif -#define NVDECAPI_MAJOR_VERSION 10 +#define NVDECAPI_MAJOR_VERSION 11 #define NVDECAPI_MINOR_VERSION 0 #define NVDECAPI_VERSION (NVDECAPI_MAJOR_VERSION | (NVDECAPI_MINOR_VERSION << 24)) @@ -76,6 +75,7 @@ typedef enum cudaVideoCodec_enum { cudaVideoCodec_HEVC, /**< HEVC */ cudaVideoCodec_VP8, /**< VP8 */ cudaVideoCodec_VP9, /**< VP9 */ + cudaVideoCodec_AV1, /**< AV1 */ cudaVideoCodec_NumCodecs, /**< Max codecs */ // Uncompressed YUV cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), /**< Y,U,V (4:2:0) */ @@ -164,7 +164,7 @@ typedef struct _CUVIDDECODECAPS unsigned int reserved1[3]; /**< Reserved for future use - set to zero */ unsigned char bIsSupported; /**< OUT: 1 if codec supported, 0 if not supported */ - unsigned char reserved2; /**< Reserved for future use - set to zero */ + unsigned char nNumNVDECs; /**< OUT: Number of NVDECs that can support IN params */ unsigned short nOutputFormatMask; /**< OUT: each bit represents corresponding cudaVideoSurfaceFormat enum */ unsigned int nMaxWidth; /**< OUT: Max supported coded width in pixels */ unsigned int nMaxHeight; /**< OUT: Max supported coded height in pixels */ @@ -172,7 +172,12 @@ typedef struct _CUVIDDECODECAPS CodedWidth*CodedHeight/256 must be <= nMaxMBCount */ unsigned short nMinWidth; /**< OUT: Min supported coded width in pixels */ unsigned short nMinHeight; /**< OUT: Min supported coded height in pixels */ - unsigned int reserved3[11]; /**< Reserved for future use - set to zero */ + unsigned char bIsHistogramSupported; /**< OUT: 1 if Y component histogram output is supported, 0 if not + Note: histogram is computed on original picture data before + any post-processing like scaling, cropping, etc. is applied */ + unsigned char nCounterBitDepth; /**< OUT: histogram counter bit depth */ + unsigned short nMaxHistogramBins; /**< OUT: Max number of histogram bins */ + unsigned int reserved3[10]; /**< Reserved for future use - set to zero */ } CUVIDDECODECAPS; /**************************************************************************************************************/ @@ -223,7 +228,9 @@ typedef struct _CUVIDDECODECREATEINFO short right; short bottom; } target_rect; - tcu_ulong Reserved2[5]; /**< Reserved for future use - set to zero */ + + tcu_ulong enableHistogram; /**< IN: enable histogram output, if supported */ + tcu_ulong Reserved2[4]; /**< Reserved for future use - set to zero */ } CUVIDDECODECREATEINFO; /*********************************************************/ @@ -722,6 +729,181 @@ typedef struct _CUVIDVP9PICPARAMS } CUVIDVP9PICPARAMS; +/***********************************************************/ +//! \struct CUVIDAV1PICPARAMS +//! AV1 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/***********************************************************/ +typedef struct _CUVIDAV1PICPARAMS +{ + unsigned int width; // coded width, if superres enabled then it is upscaled width + unsigned int height; // coded height + unsigned int frame_offset; // defined as order_hint in AV1 specification + int decodePicIdx; // decoded output pic index, if film grain enabled, it will keep decoded (without film grain) output + // It can be used as reference frame for future frames + + // sequence header + unsigned int profile : 3; // 0 = profile0, 1 = profile1, 2 = profile2 + unsigned int use_128x128_superblock : 1; // superblock size 0:64x64, 1: 128x128 + unsigned int subsampling_x : 1; // (subsampling_x, _y) 1,1 = 420, 1,0 = 422, 0,0 = 444 + unsigned int subsampling_y : 1; + unsigned int mono_chrome : 1; // for monochrome content, mono_chrome = 1 and (subsampling_x, _y) should be 1,1 + unsigned int bit_depth_minus8 : 4; // bit depth minus 8 + unsigned int enable_filter_intra : 1; // tool enable in seq level, 0 : disable 1: frame header control + unsigned int enable_intra_edge_filter : 1; // intra edge filtering process, 0 : disable 1: enabled + unsigned int enable_interintra_compound : 1; // interintra, 0 : not present 1: present + unsigned int enable_masked_compound : 1; // 1: mode info for inter blocks may contain the syntax element compound_type. + // 0: syntax element compound_type will not be present + unsigned int enable_dual_filter : 1; // vertical and horiz filter selection, 1: enable and 0: disable + unsigned int enable_order_hint : 1; // order hint, and related tools, 1: enable and 0: disable + unsigned int order_hint_bits_minus1 : 3; // is used to compute OrderHintBits + unsigned int enable_jnt_comp : 1; // joint compound modes, 1: enable and 0: disable + unsigned int enable_superres : 1; // superres in seq level, 0 : disable 1: frame level control + unsigned int enable_cdef : 1; // cdef filtering in seq level, 0 : disable 1: frame level control + unsigned int enable_restoration : 1; // loop restoration filtering in seq level, 0 : disable 1: frame level control + unsigned int enable_fgs : 1; // defined as film_grain_params_present in AV1 specification + unsigned int reserved0_7bits : 7; // reserved bits; must be set to 0 + + // frame header + unsigned int frame_type : 2 ; // 0:Key frame, 1:Inter frame, 2:intra only, 3:s-frame + unsigned int show_frame : 1 ; // show_frame = 1 implies that frame should be immediately output once decoded + unsigned int disable_cdf_update : 1; // CDF update during symbol decoding, 1: disabled, 0: enabled + unsigned int allow_screen_content_tools : 1; // 1: intra blocks may use palette encoding, 0: palette encoding is never used + unsigned int force_integer_mv : 1; // 1: motion vectors will always be integers, 0: can contain fractional bits + unsigned int coded_denom : 3; // coded_denom of the superres scale as specified in AV1 specification + unsigned int allow_intrabc : 1; // 1: intra block copy may be used, 0: intra block copy is not allowed + unsigned int allow_high_precision_mv : 1; // 1/8 precision mv enable + unsigned int interp_filter : 3; // interpolation filter. Refer to section 6.8.9 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned int switchable_motion_mode : 1; // defined as is_motion_mode_switchable in AV1 specification + unsigned int use_ref_frame_mvs : 1; // 1: current frame can use the previous frame mv information, 0: will not use. + unsigned int disable_frame_end_update_cdf : 1; // 1: indicates that the end of frame CDF update is disabled + unsigned int delta_q_present : 1; // quantizer index delta values are present in the block level + unsigned int delta_q_res : 2; // left shift which should be applied to decoded quantizer index delta values + unsigned int using_qmatrix : 1; // 1: quantizer matrix will be used to compute quantizers + unsigned int coded_lossless : 1; // 1: all segments use lossless coding + unsigned int use_superres : 1; // 1: superres enabled for frame + unsigned int tx_mode : 2; // 0: ONLY4x4,1:LARGEST,2:SELECT + unsigned int reference_mode : 1; // 0: SINGLE, 1: SELECT + unsigned int allow_warped_motion : 1; // 1: allow_warped_motion may be present, 0: allow_warped_motion will not be present + unsigned int reduced_tx_set : 1; // 1: frame is restricted to subset of the full set of transform types, 0: no such restriction + unsigned int skip_mode : 1; // 1: most of the mode info is skipped, 0: mode info is not skipped + unsigned int reserved1_3bits : 3; // reserved bits; must be set to 0 + + // tiling info + unsigned int num_tile_cols : 8; // number of tiles across the frame., max is 64 + unsigned int num_tile_rows : 8; // number of tiles down the frame., max is 64 + unsigned int context_update_tile_id : 16; // specifies which tile to use for the CDF update + unsigned short tile_widths[64]; // Width of each column in superblocks + unsigned short tile_heights[64]; // height of each row in superblocks + + // CDEF - refer to section 6.10.14 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned char cdef_damping_minus_3 : 2; // controls the amount of damping in the deringing filter + unsigned char cdef_bits : 2; // the number of bits needed to specify which CDEF filter to apply + unsigned char reserved2_4bits : 4; // reserved bits; must be set to 0 + unsigned char cdef_y_strength[8]; // 0-3 bits: y_pri_strength, 4-7 bits y_sec_strength + unsigned char cdef_uv_strength[8]; // 0-3 bits: uv_pri_strength, 4-7 bits uv_sec_strength + + // SkipModeFrames + unsigned char SkipModeFrame0 : 4; // specifies the frames to use for compound prediction when skip_mode is equal to 1. + unsigned char SkipModeFrame1 : 4; + + // qp information - refer to section 6.8.11 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned char base_qindex; // indicates the base frame qindex. Defined as base_q_idx in AV1 specification + char qp_y_dc_delta_q; // indicates the Y DC quantizer relative to base_q_idx. Defined as DeltaQYDc in AV1 specification + char qp_u_dc_delta_q; // indicates the U DC quantizer relative to base_q_idx. Defined as DeltaQUDc in AV1 specification + char qp_v_dc_delta_q; // indicates the V DC quantizer relative to base_q_idx. Defined as DeltaQVDc in AV1 specification + char qp_u_ac_delta_q; // indicates the U AC quantizer relative to base_q_idx. Defined as DeltaQUAc in AV1 specification + char qp_v_ac_delta_q; // indicates the V AC quantizer relative to base_q_idx. Defined as DeltaQVAc in AV1 specification + unsigned char qm_y; // specifies the level in the quantizer matrix that should be used for luma plane decoding + unsigned char qm_u; // specifies the level in the quantizer matrix that should be used for chroma U plane decoding + unsigned char qm_v; // specifies the level in the quantizer matrix that should be used for chroma V plane decoding + + // segmentation - refer to section 6.8.13 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned char segmentation_enabled : 1; // 1 indicates that this frame makes use of the segmentation tool + unsigned char segmentation_update_map : 1; // 1 indicates that the segmentation map are updated during the decoding of this frame + unsigned char segmentation_update_data : 1; // 1 indicates that new parameters are about to be specified for each segment + unsigned char segmentation_temporal_update : 1; // 1 indicates that the updates to the segmentation map are coded relative to the existing segmentation map + unsigned char reserved3_4bits : 4; // reserved bits; must be set to 0 + short segmentation_feature_data[8][8]; // specifies the feature data for a segment feature + unsigned char segmentation_feature_mask[8]; // indicates that the corresponding feature is unused or feature value is coded + + // loopfilter - refer to section 6.8.10 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned char loop_filter_level[2]; // contains loop filter strength values + unsigned char loop_filter_level_u; // loop filter strength value of U plane + unsigned char loop_filter_level_v; // loop filter strength value of V plane + unsigned char loop_filter_sharpness; // indicates the sharpness level + char loop_filter_ref_deltas[8]; // contains the adjustment needed for the filter level based on the chosen reference frame + char loop_filter_mode_deltas[2]; // contains the adjustment needed for the filter level based on the chosen mode + unsigned char loop_filter_delta_enabled : 1; // indicates that the filter level depends on the mode and reference frame used to predict a block + unsigned char loop_filter_delta_update : 1; // indicates that additional syntax elements are present that specify which mode and + // reference frame deltas are to be updated + unsigned char delta_lf_present : 1; // specifies whether loop filter delta values are present in the block level + unsigned char delta_lf_res : 2; // specifies the left shift to apply to the decoded loop filter values + unsigned char delta_lf_multi : 1; // separate loop filter deltas for Hy,Vy,U,V edges + unsigned char reserved4_2bits : 2; // reserved bits; must be set to 0 + + // restoration - refer to section 6.10.15 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned char lr_unit_size[3]; // specifies the size of loop restoration units: 0: 32, 1: 64, 2: 128, 3: 256 + unsigned char lr_type[3] ; // used to compute FrameRestorationType + + // reference frames + unsigned char primary_ref_frame; // specifies which reference frame contains the CDF values and other state that should be + // loaded at the start of the frame + unsigned char ref_frame_map[8]; // frames in dpb that can be used as reference for current or future frames + + unsigned char temporal_layer_id : 4; // temporal layer id + unsigned char spatial_layer_id : 4; // spatial layer id + + unsigned char reserved5_32bits[4]; // reserved bits; must be set to 0 + + // ref frame list + struct + { + unsigned int width; + unsigned int height; + unsigned char index; + unsigned char reserved24Bits[3]; // reserved bits; must be set to 0 + } ref_frame[7]; // frames used as reference frame for current frame. + + // global motion + struct { + unsigned char invalid : 1; + unsigned char wmtype : 2; // defined as GmType in AV1 specification + unsigned char reserved5Bits : 5; // reserved bits; must be set to 0 + char reserved24Bits[3]; // reserved bits; must be set to 0 + int wmmat[6]; // defined as gm_params[] in AV1 specification + } global_motion[7]; // global motion params for reference frames + + // film grain params - refer to section 6.8.20 of the AV1 specification Version 1.0.0 with Errata 1 + unsigned short apply_grain : 1; + unsigned short overlap_flag : 1; + unsigned short scaling_shift_minus8 : 2; + unsigned short chroma_scaling_from_luma : 1; + unsigned short ar_coeff_lag : 2; + unsigned short ar_coeff_shift_minus6 : 2; + unsigned short grain_scale_shift : 2; + unsigned short clip_to_restricted_range : 1; + unsigned short reserved6_4bits : 4; // reserved bits; must be set to 0 + unsigned char num_y_points; + unsigned char scaling_points_y[14][2]; + unsigned char num_cb_points; + unsigned char scaling_points_cb[10][2]; + unsigned char num_cr_points; + unsigned char scaling_points_cr[10][2]; + unsigned char reserved7_8bits; // reserved bits; must be set to 0 + unsigned short random_seed; + short ar_coeffs_y[24]; + short ar_coeffs_cb[25]; + short ar_coeffs_cr[25]; + unsigned char cb_mult; + unsigned char cb_luma_mult; + short cb_offset; + unsigned char cr_mult; + unsigned char cr_luma_mult; + short cr_offset; + + int reserved[7]; // reserved bits; must be set to 0 +} CUVIDAV1PICPARAMS; /******************************************************************************************/ //! \struct CUVIDPICPARAMS @@ -756,6 +938,7 @@ typedef struct _CUVIDPICPARAMS CUVIDHEVCPICPARAMS hevc; CUVIDVP8PICPARAMS vp8; CUVIDVP9PICPARAMS vp9; + CUVIDAV1PICPARAMS av1; unsigned int CodecReserved[1024]; } CodecSpecific; } CUVIDPICPARAMS; @@ -768,22 +951,23 @@ typedef struct _CUVIDPICPARAMS /******************************************************/ typedef struct _CUVIDPROCPARAMS { - int progressive_frame; /**< IN: Input is progressive (deinterlace_mode will be ignored) */ - int second_field; /**< IN: Output the second field (ignored if deinterlace mode is Weave) */ - int top_field_first; /**< IN: Input frame is top field first (1st field is top, 2nd field is bottom) */ - int unpaired_field; /**< IN: Input only contains one field (2nd field is invalid) */ + int progressive_frame; /**< IN: Input is progressive (deinterlace_mode will be ignored) */ + int second_field; /**< IN: Output the second field (ignored if deinterlace mode is Weave) */ + int top_field_first; /**< IN: Input frame is top field first (1st field is top, 2nd field is bottom) */ + int unpaired_field; /**< IN: Input only contains one field (2nd field is invalid) */ // The fields below are used for raw YUV input - unsigned int reserved_flags; /**< Reserved for future use (set to zero) */ - unsigned int reserved_zero; /**< Reserved (set to zero) */ - unsigned long long raw_input_dptr; /**< IN: Input CUdeviceptr for raw YUV extensions */ - unsigned int raw_input_pitch; /**< IN: pitch in bytes of raw YUV input (should be aligned appropriately) */ - unsigned int raw_input_format; /**< IN: Input YUV format (cudaVideoCodec_enum) */ - unsigned long long raw_output_dptr; /**< IN: Output CUdeviceptr for raw YUV extensions */ - unsigned int raw_output_pitch; /**< IN: pitch in bytes of raw YUV output (should be aligned appropriately) */ - unsigned int Reserved1; /**< Reserved for future use (set to zero) */ - CUstream output_stream; /**< IN: stream object used by cuvidMapVideoFrame */ - unsigned int Reserved[46]; /**< Reserved for future use (set to zero) */ - void *Reserved2[2]; /**< Reserved for future use (set to zero) */ + unsigned int reserved_flags; /**< Reserved for future use (set to zero) */ + unsigned int reserved_zero; /**< Reserved (set to zero) */ + unsigned long long raw_input_dptr; /**< IN: Input CUdeviceptr for raw YUV extensions */ + unsigned int raw_input_pitch; /**< IN: pitch in bytes of raw YUV input (should be aligned appropriately) */ + unsigned int raw_input_format; /**< IN: Input YUV format (cudaVideoCodec_enum) */ + unsigned long long raw_output_dptr; /**< IN: Output CUdeviceptr for raw YUV extensions */ + unsigned int raw_output_pitch; /**< IN: pitch in bytes of raw YUV output (should be aligned appropriately) */ + unsigned int Reserved1; /**< Reserved for future use (set to zero) */ + CUstream output_stream; /**< IN: stream object used by cuvidMapVideoFrame */ + unsigned int Reserved[46]; /**< Reserved for future use (set to zero) */ + unsigned long long *histogram_dptr; /**< OUT: Output CUdeviceptr for histogram extensions */ + void *Reserved2[1]; /**< Reserved for future use (set to zero) */ } CUVIDPROCPARAMS; /*********************************************************************************************************/ diff --git a/include/ffnvcodec/dynlink_nvcuvid.h b/include/ffnvcodec/dynlink_nvcuvid.h index 1bc780d..7011a0d 100644 --- a/include/ffnvcodec/dynlink_nvcuvid.h +++ b/include/ffnvcodec/dynlink_nvcuvid.h @@ -28,7 +28,7 @@ /********************************************************************************************************************/ //! \file nvcuvid.h //! NVDECODE API provides video decoding interface to NVIDIA GPU devices. -//! \date 2015-2019 +//! \date 2015-2020 //! This file contains the interface constants, structure definitions and function prototypes. /********************************************************************************************************************/ @@ -148,6 +148,37 @@ typedef struct unsigned int seqhdr_data_length; /**< OUT: Additional bytes following (CUVIDEOFORMATEX) */ } CUVIDEOFORMAT; +/****************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDOPERATINGPOINTINFO +//! Operating point information of scalable bitstream +/****************************************************************/ +typedef struct +{ + cudaVideoCodec codec; + union + { + struct + { + unsigned char operating_points_cnt; + unsigned char reserved24_bits[3]; + unsigned short operating_points_idc[32]; + } av1; + unsigned char CodecReserved[1024]; + }; +} CUVIDOPERATINGPOINTINFO; + +/****************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDAV1SEQHDR +//! AV1 specific sequence header information +/****************************************************************/ +typedef struct { + unsigned int max_width; + unsigned int max_height; + unsigned char reserved[1016]; +} CUVIDAV1SEQHDR; + /****************************************************************/ //! \ingroup STRUCTS //! \struct CUVIDEOFORMATEX @@ -157,7 +188,10 @@ typedef struct typedef struct { CUVIDEOFORMAT format; /**< OUT: CUVIDEOFORMAT structure */ - unsigned char raw_seqhdr_data[1024]; /**< OUT: Sequence header data */ + union { + CUVIDAV1SEQHDR av1; + unsigned char raw_seqhdr_data[1024]; /**< OUT: Sequence header data */ + }; } CUVIDEOFORMATEX; /****************************************************************/ @@ -221,7 +255,9 @@ typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *); typedef struct _CUVIDSOURCEPARAMS { unsigned int ulClockRate; /**< IN: Time stamp units in Hz (0=default=10000000Hz) */ - unsigned int uReserved1[7]; /**< Reserved for future use - set to zero */ + unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */ + unsigned int uReserved : 31; /**< Reserved for future use - set to zero */ + unsigned int uReserved1[6]; /**< Reserved for future use - set to zero */ void *pUserData; /**< IN: User private data passed in to the data handlers */ PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< IN: Called to deliver video packets */ PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< IN: Called to deliver audio packets. */ @@ -323,14 +359,18 @@ typedef struct _CUVIDPARSERDISPINFO //! is ready to be decoded and/or displayed. First argument in functions is "void *pUserData" member of structure CUVIDSOURCEPARAMS //! Return values from these callbacks are interpreted as below. If the callbacks return failure, it will be propagated by //! cuvidParseVideoData() to the application. +//! Parser picks default operating point as 0 and outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is +//! -1 or invalid operating point. //! PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces //! while creating parser) //! PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded //! PFNVIDDISPLAYCALLBACK : 0: fail, >=1: succeeded +//! PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30: reserved) /***********************************************************************************************************************/ typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *); typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *); typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *); +typedef int (CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO*); /**************************************/ //! \ingroup STRUCTS @@ -346,12 +386,16 @@ typedef struct _CUVIDPARSERPARAMS IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */ unsigned int ulMaxDisplayDelay; /**< IN: Max display queue delay (improves pipelining of decode with display) 0=no delay (recommended values: 2..4) */ - unsigned int uReserved1[5]; /**< IN: Reserved for future use - set to 0 */ + unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */ + unsigned int uReserved : 31; /**< Reserved for future use - set to zero */ + unsigned int uReserved1[4]; /**< IN: Reserved for future use - set to 0 */ void *pUserData; /**< IN: User data for callbacks */ PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */ PFNVIDDECODECALLBACK pfnDecodePicture; /**< IN: Called when a picture is ready to be decoded (decode order) */ PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< IN: Called whenever a picture is ready to be displayed (display order) */ - void *pvReserved2[7]; /**< Reserved for future use - set to NULL */ + PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1 + scalable bitstream */ + void *pvReserved2[6]; /**< Reserved for future use - set to NULL */ CUVIDEOFORMATEX *pExtVideoInfo; /**< IN: [Optional] sequence header data from system layer */ } CUVIDPARSERPARAMS; @@ -388,5 +432,3 @@ typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser obj); #endif /* __cplusplus */ #endif // __NVCUVID_H__ - - diff --git a/include/ffnvcodec/nvEncodeAPI.h b/include/ffnvcodec/nvEncodeAPI.h index afb80ec..4b0c9ff 100644 --- a/include/ffnvcodec/nvEncodeAPI.h +++ b/include/ffnvcodec/nvEncodeAPI.h @@ -115,7 +115,7 @@ typedef void* NV_ENC_OUTPUT_PTR; /**< NVENCODE API output buffer*/ typedef void* NV_ENC_REGISTERED_PTR; /**< A Resource that has been registered with NVENCODE API*/ typedef void* NV_ENC_CUSTREAM_PTR; /**< Pointer to CUstream*/ -#define NVENCAPI_MAJOR_VERSION 10 +#define NVENCAPI_MAJOR_VERSION 11 #define NVENCAPI_MINOR_VERSION 0 #define NVENCAPI_VERSION (NVENCAPI_MAJOR_VERSION | (NVENCAPI_MINOR_VERSION << 24)) @@ -178,10 +178,6 @@ static const GUID NV_ENC_H264_PROFILE_HIGH_444_GUID = static const GUID NV_ENC_H264_PROFILE_STEREO_GUID = { 0x40847bf5, 0x33f7, 0x4601, { 0x90, 0x84, 0xe8, 0xfe, 0x3c, 0x1d, 0xb8, 0xb7 } }; -// {CE788D20-AAA9-4318-92BB-AC7E858C8D36} -static const GUID NV_ENC_H264_PROFILE_SVC_TEMPORAL_SCALABILTY = -{ 0xce788d20, 0xaaa9, 0x4318, { 0x92, 0xbb, 0xac, 0x7e, 0x85, 0x8c, 0x8d, 0x36 } }; - // {B405AFAC-F32B-417B-89C4-9ABEED3E5978} static const GUID NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID = { 0xb405afac, 0xf32b, 0x417b, { 0x89, 0xc4, 0x9a, 0xbe, 0xed, 0x3e, 0x59, 0x78 } }; @@ -1116,6 +1112,18 @@ typedef enum _NV_ENC_CAPS */ NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES, + /** + * Indicates HW support for HEVC with alpha encoding. + * \n 0 : HEVC with alpha encoding not supported. + * \n 1 : HEVC with alpha encoding is supported. + */ + NV_ENC_CAPS_SUPPORT_ALPHA_LAYER_ENCODING, + + /** + * Indicates number of Encoding engines present on GPU. + */ + NV_ENC_CAPS_NUM_ENCODER_ENGINES, + /** * Reserved - Not to be used by clients. */ @@ -1288,8 +1296,10 @@ typedef struct _NV_ENC_QP NV_ENC_QP minQP; /**< [in]: Specifies the minimum QP used for rate control. Client must set NV_ENC_CONFIG::enableMinQP to 1. */ NV_ENC_QP maxQP; /**< [in]: Specifies the maximum QP used for rate control. Client must set NV_ENC_CONFIG::enableMaxQP to 1. */ NV_ENC_QP initialRCQP; /**< [in]: Specifies the initial QP used for rate control. Client must set NV_ENC_CONFIG::enableInitialRCQP to 1. */ - uint32_t temporallayerIdxMask; /**< [in]: Specifies the temporal layers (as a bitmask) whose QPs have changed. Valid max bitmask is [2^NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS - 1] */ - uint8_t temporalLayerQP[8]; /**< [in]: Specifies the temporal layer QPs used for rate control. Temporal layer index is used as the array index */ + uint32_t temporallayerIdxMask; /**< [in]: Specifies the temporal layers (as a bitmask) whose QPs have changed. Valid max bitmask is [2^NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS - 1]. + Applicable only for constant QP mode (NV_ENC_RC_PARAMS::rateControlMode = NV_ENC_PARAMS_RC_CONSTQP). */ + uint8_t temporalLayerQP[8]; /**< [in]: Specifies the temporal layer QPs used for rate control. Temporal layer index is used as the array index. + Applicable only for constant QP mode (NV_ENC_RC_PARAMS::rateControlMode = NV_ENC_PARAMS_RC_CONSTQP). */ uint8_t targetQuality; /**< [in]: Target CQ (Constant Quality) level for VBR mode (range 0-51 with 0-automatic) */ uint8_t targetQualityLSB; /**< [in]: Fractional part of target quality (as 8.8 fixed point format) */ uint16_t lookaheadDepth; /**< [in]: Maximum depth of lookahead with range 0-(31 - number of B frames). @@ -1315,7 +1325,9 @@ typedef struct _NV_ENC_QP Other values are reserved for future use.*/ NV_ENC_MULTI_PASS multiPass; /**< [in]: This flag is used to enable multi-pass encoding for a given ::NV_ENC_PARAMS_RC_MODE. This flag is not valid for H264 and HEVC MEOnly mode */ - uint32_t reserved[6]; + uint32_t alphaLayerBitrateRatio; /**< [in]: Specifies the ratio in which bitrate should be split between base and alpha layer. A value 'x' for this field will split the target bitrate in a ratio of x : 1 between base and alpha layer. + The default split ratio is 15.*/ + uint32_t reserved[5]; } NV_ENC_RC_PARAMS; /** macro for constructing the version field of ::_NV_ENC_RC_PARAMS */ @@ -1385,7 +1397,7 @@ typedef struct _NVENC_EXTERNAL_ME_HINT */ typedef struct _NV_ENC_CONFIG_H264 { - uint32_t reserved :1; /**< [in]: Reserved and must be set to 0 */ + uint32_t enableTemporalSVC :1; /**< [in]: Set to 1 to enable SVC temporal*/ uint32_t enableStereoMVC :1; /**< [in]: Set to 1 to enable stereo MVC*/ uint32_t hierarchicalPFrames :1; /**< [in]: Set to 1 to enable hierarchical P Frames */ uint32_t hierarchicalBFrames :1; /**< [in]: Set to 1 to enable hierarchical B Frames */ @@ -1423,7 +1435,11 @@ typedef struct _NV_ENC_CONFIG_H264 NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is also set is currently not supported and will make ::NvEncInitializeEncoder() return an error. */ - uint32_t reservedBitFields :14; /**< [in]: Reserved bitfields and must be set to 0 */ + uint32_t disableSVCPrefixNalu :1; /**< [in]: Set to 1 to disable writing of SVC Prefix NALU preceding each slice in bitstream. + Applicable only when temporal SVC is enabled (NV_ENC_CONFIG_H264::enableTemporalSVC = 1). */ + uint32_t enableScalabilityInfoSEI :1; /**< [in]: Set to 1 to enable writing of Scalability Information SEI message preceding each IDR picture in bitstream + Applicable only when temporal SVC is enabled (NV_ENC_CONFIG_H264::enableTemporalSVC = 1). */ + uint32_t reservedBitFields :12; /**< [in]: Reserved bitfields and must be set to 0 */ uint32_t level; /**< [in]: Specifies the encoding level. Client is recommended to set this to NV_ENC_LEVEL_AUTOSELECT in order to enable the NvEncodeAPI interface to select the correct level. */ uint32_t idrPeriod; /**< [in]: Specifies the IDR interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG.Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically. */ uint32_t separateColourPlaneFlag; /**< [in]: Set to 1 to enable 4:4:4 separate colour planes */ @@ -1432,7 +1448,7 @@ typedef struct _NV_ENC_CONFIG_H264 which specifies whether the operation of the deblocking filter shall be disabled across some block edges of the slice and specifies for which edges the filtering is disabled. See section 7.4.3 of H.264 specification for more details.*/ - uint32_t numTemporalLayers; /**< [in]: Specifies max temporal layers to be used for hierarchical coding. Valid value range is [1,::NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS] */ + uint32_t numTemporalLayers; /**< [in]: Specifies number of temporal layers to be used for hierarchical coding / temporal SVC. Valid value range is [1,::NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS] */ uint32_t spsId; /**< [in]: Specifies the SPS id of the sequence header */ uint32_t ppsId; /**< [in]: Specifies the PPS id of the picture header */ NV_ENC_H264_ADAPTIVE_TRANSFORM_MODE adaptiveTransformMode; /**< [in]: Specifies the AdaptiveTransform Mode. Check support for AdaptiveTransform mode using ::NV_ENC_CAPS_SUPPORT_ADAPTIVE_TRANSFORM caps. */ @@ -1466,7 +1482,9 @@ typedef struct _NV_ENC_CONFIG_H264 Set to 0 when using "LTR Per Picture" mode of LTR operation. */ uint32_t chromaFormatIDC; /**< [in]: Specifies the chroma format. Should be set to 1 for yuv420 input, 3 for yuv444 input. Check support for YUV444 encoding using ::NV_ENC_CAPS_SUPPORT_YUV444_ENCODE caps.*/ - uint32_t maxTemporalLayers; /**< [in]: Specifies the max temporal layer used for hierarchical coding. */ + uint32_t maxTemporalLayers; /**< [in]: Specifies the maximum temporal layer used for temporal SVC / hierarchical coding. + Defaut value of this field is NV_ENC_CAPS::NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS. Note that the value NV_ENC_CONFIG_H264::maxNumRefFrames should + be greater than or equal to (NV_ENC_CONFIG_H264::maxTemporalLayers - 2) * 2, for NV_ENC_CONFIG_H264::maxTemporalLayers >= 2.*/ NV_ENC_BFRAME_REF_MODE useBFramesAsRef; /**< [in]: Specifies the B-Frame as reference mode. Check support for useBFramesAsRef mode using ::NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE caps.*/ NV_ENC_NUM_REF_FRAMES numRefL0; /**< [in]: Specifies max number of reference frames in reference picture list L0, that can be used by hardware for prediction of a frame. Check support for numRefL0 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps. */ @@ -1516,7 +1534,8 @@ typedef struct _NV_ENC_CONFIG_HEVC uint32_t enableConstrainedEncoding :1; /**< [in]: Set this to 1 to enable constrainedFrame encoding where each slice in the constrained picture is independent of other slices. Constrained encoding works only with rectangular slices. Check support for constrained encoding using ::NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING caps. */ - uint32_t reserved :16; /**< [in]: Reserved bitfields.*/ + uint32_t enableAlphaLayerEncoding :1; /**< [in]: Set this to 1 to enable HEVC encode with alpha layer. */ + uint32_t reserved :15; /**< [in]: Reserved bitfields.*/ uint32_t idrPeriod; /**< [in]: Specifies the IDR interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG. Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically. */ uint32_t intraRefreshPeriod; /**< [in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set. Will be disabled if NV_ENC_CONFIG::gopLength is not set to NVENC_INFINITE_GOPLENGTH. */ @@ -1916,8 +1935,10 @@ typedef struct _NV_ENC_PIC_PARAMS uint32_t reservedBitFields; /**< [in]: Reserved bitfields and must be set to 0 */ uint16_t meHintRefPicDist[2]; /**< [in]: Specifies temporal distance for reference picture (NVENC_EXTERNAL_ME_HINT::refidx = 0) used during external ME with NV_ENC_INITALIZE_PARAMS::enablePTD = 1 . meHintRefPicDist[0] is for L0 hints and meHintRefPicDist[1] is for L1 hints. If not set, will internally infer distance of 1. Ignored for NV_ENC_INITALIZE_PARAMS::enablePTD = 0 */ + NV_ENC_INPUT_PTR alphaBuffer; /**< [in]: Specifies the input alpha buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs. + Applicable only when encoding hevc with alpha layer is enabled. */ uint32_t reserved3[286]; /**< [in]: Reserved and must be set to 0 */ - void* reserved4[60]; /**< [in]: Reserved and must be set to NULL */ + void* reserved4[59]; /**< [in]: Reserved and must be set to NULL */ } NV_ENC_PIC_PARAMS; /** Macro for constructing the version field of ::_NV_ENC_PIC_PARAMS */ @@ -1978,7 +1999,8 @@ typedef struct _NV_ENC_LOCK_BITSTREAM uint32_t frameIdx; /**< [out]: Frame no. for which the bitstream is being retrieved. */ uint32_t hwEncodeStatus; /**< [out]: The NvEncodeAPI interface status for the locked picture. */ uint32_t numSlices; /**< [out]: Number of slices in the encoded picture. Will be reported only if NV_ENC_INITIALIZE_PARAMS::reportSliceOffsets set to 1. */ - uint32_t bitstreamSizeInBytes; /**< [out]: Actual number of bytes generated and copied to the memory pointed by bitstreamBufferPtr. */ + uint32_t bitstreamSizeInBytes; /**< [out]: Actual number of bytes generated and copied to the memory pointed by bitstreamBufferPtr. + When HEVC alpha layer encoding is enabled, this field reports the total encoded size in bytes i.e it is the encoded size of the base plus the alpha layer. */ uint64_t outputTimeStamp; /**< [out]: Presentation timestamp associated with the encoded output. */ uint64_t outputDuration; /**< [out]: Presentation duration associates with the encoded output. */ void* bitstreamBufferPtr; /**< [out]: Pointer to the generated output bitstream. @@ -1995,7 +2017,9 @@ typedef struct _NV_ENC_LOCK_BITSTREAM uint32_t interMBCount; /**< [out]: For H264, Number of Inter MBs in the encoded frame, includes skip MBs. For HEVC, Number of Inter CTBs in the encoded frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1. */ int32_t averageMVX; /**< [out]: Average Motion Vector in X direction for the encoded frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1. */ int32_t averageMVY; /**< [out]: Average Motion Vector in y direction for the encoded frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1. */ - uint32_t reserved1[219]; /**< [in]: Reserved and must be set to 0 */ + uint32_t alphaLayerSizeInBytes; /**< [out]: Number of bytes generated for the alpha layer in the encoded output. Applicable only when HEVC with alpha encoding is enabled. */ + + uint32_t reserved1[218]; /**< [in]: Reserved and must be set to 0 */ void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ } NV_ENC_LOCK_BITSTREAM;