Skip to content

Commit

Permalink
fix: handle video orientation from webrtc-extension (#452)
Browse files Browse the repository at this point in the history
  • Loading branch information
giangndm authored Nov 13, 2024
1 parent f61ecd2 commit 599e2ad
Show file tree
Hide file tree
Showing 12 changed files with 226 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ mod tests {
marker: true,
nackable: false,
layers: None,
meta: MediaMeta::Vp8 { key, sim: None },
meta: MediaMeta::Vp8 { key, sim: None, rotation: None },
data: vec![1, 2, 3],
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,13 @@ impl Selector {
if self.target == self.current {
return;
}
if let MediaMeta::H264 { key, profile: _, sim: Some(_sim) } = &mut pkt.meta {
if let MediaMeta::H264 {
key,
profile: _,
sim: Some(_sim),
rotation: _,
} = &mut pkt.meta
{
match (self.current, self.target) {
(Some(current), Some(target)) => {
match target.cmp(&current) {
Expand Down Expand Up @@ -106,7 +112,12 @@ impl Selector {
fn is_allow(&mut self, _ctx: &mut VideoSelectorCtx, pkt: &mut MediaPacket) -> Option<()> {
let current = self.current?;
match &mut pkt.meta {
MediaMeta::H264 { key: _, profile: _, sim: Some(sim) } => {
MediaMeta::H264 {
key: _,
profile: _,
sim: Some(sim),
rotation: _,
} => {
if current == sim.spatial {
Some(())
} else {
Expand Down Expand Up @@ -177,6 +188,7 @@ mod tests {
key,
profile: H264Profile::P42001fNonInterleaved,
sim: Some(H264Sim { spatial }),
rotation: None,
},
data: vec![1, 2, 3],
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ impl Selector {
if self.target == self.current {
return;
}
if let MediaMeta::Vp8 { key, sim: Some(sim) } = &mut pkt.meta {
if let MediaMeta::Vp8 { key, sim: Some(sim), rotation: _ } = &mut pkt.meta {
match (&mut self.current, &self.target) {
(Some(current), Some(target)) => {
match target.spatial.cmp(&current.spatial) {
Expand Down Expand Up @@ -164,7 +164,7 @@ impl Selector {
fn is_allow(&mut self, ctx: &mut VideoSelectorCtx, pkt: &mut MediaPacket) -> Option<()> {
let current = self.current.as_ref()?;
match &mut pkt.meta {
MediaMeta::Vp8 { key: _, sim: Some(sim) } => {
MediaMeta::Vp8 { key: _, sim: Some(sim), rotation: _ } => {
if sim.spatial == current.spatial && sim.temporal <= current.temporal {
log::trace!(
"[Vp8SimSelector] allow {} {}, seq {}, ts {}, tl0idx {:?} pic_id {:?}",
Expand Down Expand Up @@ -271,6 +271,7 @@ mod tests {
temporal,
layer_sync,
}),
rotation: None,
},
data: vec![1, 2, 3],
}
Expand Down Expand Up @@ -301,6 +302,7 @@ mod tests {
MediaMeta::Vp8 {
key: _,
sim: Some(Vp8Sim { picture_id, tl0_pic_idx, .. }),
rotation: _,
} => (picture_id.unwrap(), tl0_pic_idx.unwrap()),
_ => panic!("Should not happen"),
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,13 @@ impl Selector {
if self.target == self.current {
return;
}
if let MediaMeta::Vp9 { key, profile: _, svc: Some(svc) } = &mut pkt.meta {
if let MediaMeta::Vp9 {
key,
profile: _,
svc: Some(svc),
rotation: _,
} = &mut pkt.meta
{
match (&mut self.current, &self.target) {
(Some(current), Some(target)) => {
match target.spatial.cmp(&current.spatial) {
Expand Down Expand Up @@ -181,7 +187,12 @@ impl Selector {
fn is_allow(&mut self, ctx: &mut VideoSelectorCtx, pkt: &mut MediaPacket) -> Option<()> {
let current = self.current.as_ref()?;
match &mut pkt.meta {
MediaMeta::Vp9 { key: _, profile: _, svc: Some(svc) } => {
MediaMeta::Vp9 {
key: _,
profile: _,
svc: Some(svc),
rotation: _,
} => {
if svc.spatial <= current.spatial && svc.temporal <= current.temporal {
log::trace!(
"[Vp9SvcSelector] allow {} {}, seq {}, ts {}, marker {}, pic_id {:?}",
Expand Down Expand Up @@ -289,6 +300,7 @@ mod tests {
spatial_layers: None,
predicted_frame: false,
}),
rotation: None,
},
data: vec![1, 2, 3],
}
Expand Down
5 changes: 4 additions & 1 deletion packages/media_record/src/convert/codec/vpx_demuxer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,16 @@ impl VpxDemuxer {
let (mut depacketizer, is_key_frame) = match rtp.meta {
media_server_protocol::media::MediaMeta::Opus { .. } => panic!("wrong codec"),
media_server_protocol::media::MediaMeta::H264 { .. } => panic!("wrong codec"),
media_server_protocol::media::MediaMeta::Vp8 { key, sim } => {
media_server_protocol::media::MediaMeta::Vp8 { key, sim, rotation } => {
if let Some(sim) = sim {
if sim.spatial != 0 {
//TODO: how to get maximum quality
return None;
}
}
if let Some(_rotation) = rotation {
//TODO: process rotation
}
(Box::new(rtp::codecs::vp8::Vp8Packet::default()) as Box<dyn Depacketizer>, key)
}
media_server_protocol::media::MediaMeta::Vp9 { key, .. } => (Box::new(rtp::codecs::vp9::Vp9Packet::default()) as Box<dyn Depacketizer>, key),
Expand Down
15 changes: 15 additions & 0 deletions packages/protocol/proto/record/file_rec.proto
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,26 @@ message RecordChunk {
}

message TrackMedia {
enum Orientation {
UNKNOWN = 0;
DEG0 = 1;
DEG90 = 2;
DEG180 = 3;
DEG270 = 4;
}

message AudioLevel {
bool present = 1;
int32 level = 2;
}

uint32 media_ts = 1;
uint32 media_seq = 2;
bool marker = 3;
uint32 codec = 4;
bytes payload = 5;
Orientation orientation = 6;
AudioLevel audio_level = 7;
}

uint32 track_id = 1;
Expand Down
46 changes: 42 additions & 4 deletions packages/protocol/src/media.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,14 @@ pub enum MediaScaling {
Simulcast,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum MediaOrientation {
Deg0,
Deg90,
Deg180,
Deg270,
}

#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub enum MediaCodec {
Opus,
Expand All @@ -207,10 +215,26 @@ pub enum MediaCodec {

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum MediaMeta {
Opus { audio_level: Option<i8> },
H264 { key: bool, profile: H264Profile, sim: Option<H264Sim> },
Vp8 { key: bool, sim: Option<Vp8Sim> },
Vp9 { key: bool, profile: Vp9Profile, svc: Option<Vp9Svc> },
Opus {
audio_level: Option<i8>,
},
H264 {
key: bool,
profile: H264Profile,
sim: Option<H264Sim>,
rotation: Option<MediaOrientation>,
},
Vp8 {
key: bool,
sim: Option<Vp8Sim>,
rotation: Option<MediaOrientation>,
},
Vp9 {
key: bool,
profile: Vp9Profile,
svc: Option<Vp9Svc>,
rotation: Option<MediaOrientation>,
},
}

impl MediaMeta {
Expand All @@ -237,6 +261,20 @@ impl MediaMeta {
Self::Vp9 { profile, .. } => MediaCodec::Vp9(*profile),
}
}

pub fn rotation(&self) -> Option<MediaOrientation> {
match self {
Self::H264 { rotation, .. } | Self::Vp8 { rotation, .. } | Self::Vp9 { rotation, .. } => *rotation,
Self::Opus { .. } => None,
}
}

pub fn audio_level(&self) -> Option<i8> {
match self {
Self::Opus { audio_level, .. } => *audio_level,
_ => None,
}
}
}

#[derive(Derivative, Clone, PartialEq, Eq, Serialize, Deserialize)]
Expand Down
5 changes: 3 additions & 2 deletions packages/transport_webrtc/src/media/h264.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use media_server_protocol::media::{H264Profile, H264Sim, MediaMeta};
use media_server_protocol::media::{H264Profile, H264Sim, MediaMeta, MediaOrientation};

const H264_NALU_TTYPE_STAP_A: u32 = 24;
const H264_NALU_TTYPE_SPS: u32 = 7;
const H264_NALU_TYPE_BITMASK: u32 = 0x1F;

pub fn parse_rtp(payload: &[u8], profile: H264Profile, rid: Option<u8>) -> Option<MediaMeta> {
pub fn parse_rtp(payload: &[u8], profile: H264Profile, rid: Option<u8>, rotation: Option<MediaOrientation>) -> Option<MediaMeta> {
if payload.len() < 4 {
None
} else {
Expand All @@ -16,6 +16,7 @@ pub fn parse_rtp(payload: &[u8], profile: H264Profile, rid: Option<u8>) -> Optio
key,
profile,
sim: rid.map(|rid| H264Sim { spatial: rid }),
rotation,
})
}
}
Expand Down
121 changes: 116 additions & 5 deletions packages/transport_webrtc/src/media/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use media_server_protocol::media::{H264Profile, MediaCodec, MediaLayerBitrate, MediaLayersBitrate, MediaMeta, MediaPacket, Vp9Profile};
use media_server_protocol::media::{H264Profile, MediaCodec, MediaLayerBitrate, MediaLayersBitrate, MediaMeta, MediaOrientation, MediaPacket, Vp9Profile};
use str0m::{
format::{CodecConfig, CodecSpec},
media::{Mid, Pt, Rid},
rtp::{vla::VideoLayersAllocation, RtpPacket, Ssrc},
rtp::{vla::VideoLayersAllocation, ExtensionValues, RtpPacket, Ssrc, VideoOrientation},
};

mod bit_read;
Expand Down Expand Up @@ -60,17 +60,20 @@ impl RemoteMediaConvert {
),
MediaCodec::H264(profile) => {
let layers = rtp.header.ext_vals.user_values.get::<VideoLayersAllocation>().and_then(extract_simulcast);
let meta = h264::parse_rtp(&rtp.payload, profile, spatial)?;
let rotation = rtp.header.ext_vals.video_orientation.map(from_webrtc_orientation);
let meta = h264::parse_rtp(&rtp.payload, profile, spatial, rotation)?;
(true, layers, meta)
}
MediaCodec::Vp8 => {
let layers = rtp.header.ext_vals.user_values.get::<VideoLayersAllocation>().and_then(extract_simulcast);
let meta = vp8::parse_rtp(&rtp.payload, spatial)?;
let rotation = rtp.header.ext_vals.video_orientation.map(from_webrtc_orientation);
let meta = vp8::parse_rtp(&rtp.payload, spatial, rotation)?;
(true, layers, meta)
}
MediaCodec::Vp9(profile) => {
let layers = rtp.header.ext_vals.user_values.get::<VideoLayersAllocation>().and_then(extract_svc);
let meta = vp9::parse_rtp(&rtp.payload, profile)?;
let rotation = rtp.header.ext_vals.video_orientation.map(from_webrtc_orientation);
let meta = vp9::parse_rtp(&rtp.payload, profile, rotation)?;
(true, layers, meta)
}
};
Expand Down Expand Up @@ -210,3 +213,111 @@ fn rid_to_spatial(rid: &Rid) -> u8 {
_ => 0,
}
}

fn from_webrtc_orientation(orientation: VideoOrientation) -> MediaOrientation {
match orientation {
VideoOrientation::Deg0 => MediaOrientation::Deg0,
VideoOrientation::Deg90 => MediaOrientation::Deg90,
VideoOrientation::Deg180 => MediaOrientation::Deg180,
VideoOrientation::Deg270 => MediaOrientation::Deg270,
}
}

fn to_webrtc_orientation(orientation: MediaOrientation) -> VideoOrientation {
match orientation {
MediaOrientation::Deg0 => VideoOrientation::Deg0,
MediaOrientation::Deg90 => VideoOrientation::Deg90,
MediaOrientation::Deg180 => VideoOrientation::Deg180,
MediaOrientation::Deg270 => VideoOrientation::Deg270,
}
}

pub fn to_webrtc_extensions(pkt: &MediaPacket) -> ExtensionValues {
let mut ext = ExtensionValues::default();
if let Some(rotation) = pkt.meta.rotation() {
ext.video_orientation = Some(to_webrtc_orientation(rotation));
}
if let Some(audio_level) = pkt.meta.audio_level() {
ext.audio_level = Some(audio_level);
}
ext
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_str0m_codec_convert() {
let spec = CodecSpec {
codec: str0m::format::Codec::Opus,
clock_rate: str0m::media::Frequency::FORTY_EIGHT_KHZ,
channels: None,
format: str0m::format::FormatParams::default(),
};
assert_eq!(str0m_codec_convert(spec), Some(MediaCodec::Opus));

let spec = CodecSpec {
codec: str0m::format::Codec::H264,
clock_rate: str0m::media::Frequency::NINETY_KHZ,
channels: None,
format: str0m::format::FormatParams::parse_line("profile-level-id=42e01f;packetization-mode=1"),
};
assert_eq!(str0m_codec_convert(spec), Some(MediaCodec::H264(H264Profile::P42e01fNonInterleaved)));
}

#[test]
fn test_rid_to_spatial() {
let rid0 = Rid::from_array([b'0', b'0', b'0', b'0', b'0', b'0', b'0', b'0']);
assert_eq!(rid_to_spatial(&rid0), 0);

let rid1 = Rid::from_array([b'1', b'0', b'0', b'0', b'0', b'0', b'0', b'0']);
assert_eq!(rid_to_spatial(&rid1), 1);

let rid2 = Rid::from_array([b'2', b'0', b'0', b'0', b'0', b'0', b'0', b'0']);
assert_eq!(rid_to_spatial(&rid2), 2);

// other values should be 0
let rid3 = Rid::from_array([b'3', b'0', b'0', b'0', b'0', b'0', b'0', b'0']);
assert_eq!(rid_to_spatial(&rid3), 0);
}

#[test]
fn test_from_webrtc_orientation() {
assert_eq!(from_webrtc_orientation(VideoOrientation::Deg0), MediaOrientation::Deg0);
assert_eq!(from_webrtc_orientation(VideoOrientation::Deg90), MediaOrientation::Deg90);
assert_eq!(from_webrtc_orientation(VideoOrientation::Deg180), MediaOrientation::Deg180);
assert_eq!(from_webrtc_orientation(VideoOrientation::Deg270), MediaOrientation::Deg270);
}

#[test]
fn test_to_webrtc_orientation() {
assert_eq!(to_webrtc_orientation(MediaOrientation::Deg0), VideoOrientation::Deg0);
assert_eq!(to_webrtc_orientation(MediaOrientation::Deg90), VideoOrientation::Deg90);
assert_eq!(to_webrtc_orientation(MediaOrientation::Deg180), VideoOrientation::Deg180);
assert_eq!(to_webrtc_orientation(MediaOrientation::Deg270), VideoOrientation::Deg270);
}

#[test]
fn test_to_webrtc_extensions() {
let pkt = MediaPacket::build_audio(1, 1, Some(10), vec![1, 2, 3]);
let ext = to_webrtc_extensions(&pkt);
assert_eq!(ext.audio_level, Some(10));

let pkt = MediaPacket {
ts: 1,
seq: 1,
marker: true,
nackable: false,
layers: None,
meta: MediaMeta::Vp8 {
key: true,
sim: None,
rotation: Some(MediaOrientation::Deg90),
},
data: vec![1, 2, 3],
};
let ext = to_webrtc_extensions(&pkt);
assert_eq!(ext.video_orientation, Some(VideoOrientation::Deg90));
}
}
Loading

0 comments on commit 599e2ad

Please sign in to comment.