Skip to content

Commit

Permalink
Use re-packetized Opus audio
Browse files Browse the repository at this point in the history
This removes the ffmpeg dependency for outbound audio to Homekit.  The incoming Opus audio from Ring is simply repacketized into a format acceptable to Homekit with no additional transcoding.
  • Loading branch information
Tom Sightler committed Dec 23, 2024
1 parent c9443f8 commit 5c59b42
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 137 deletions.
164 changes: 52 additions & 112 deletions packages/homebridge-ring/camera-source.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import type { RingCamera } from 'ring-client-api'
import { hap } from './hap.ts'
import type { SrtpOptions } from '@homebridge/camera-utils'
import {
doesFfmpegSupportCodec,
generateSrtpOptions,
ReturnAudioTranscoder,
RtpSplitter,
Expand Down Expand Up @@ -137,86 +136,25 @@ class StreamingSessionWrapper {
)
}

private listenForAudioPackets(startStreamRequest: StartStreamRequest) {
async activate(request: StartStreamRequest) {
const {
targetAddress,
video: { port: videoPort },
audio: { port: audioPort },
} = this.prepareStreamRequest,
{
audio: {
codec: audioCodec,
sample_rate: audioSampleRate,
packet_time: audioPacketTime,
},
} = startStreamRequest,
// Repacketize the audio stream after it's been transcoded
audio: { sample_rate: audioSampleRate, packet_time: audioPacketTime },
} = request,
// use to encrypt Ring video to HomeKit
videoSrtpSession = new SrtpSession(getSessionConfig(this.videoSrtp)),
audioSrtpSession = new SrtpSession(getSessionConfig(this.audioSrtp)),
opusRepacketizer = new OpusRepacketizer(audioPacketTime / 20),
audioIntervalScale = ((audioSampleRate / 8) * audioPacketTime) / 20,
audioSrtpSession = new SrtpSession(getSessionConfig(this.audioSrtp))
audioIntervalScale = ((audioSampleRate / 8) * audioPacketTime) / 20

let firstTimestamp: number,
let sentVideo = false,
firstAudioTimestamp: number,
audioPacketCount = 0

this.repacketizeAudioSplitter.addMessageHandler(({ message }) => {
let rtp: RtpPacket | undefined = RtpPacket.deSerialize(message)

if (audioCodec === AudioStreamingCodecType.OPUS) {
// borrowed from scrypted
// Original source: https://github.com/koush/scrypted/blob/c13ba09889c3e0d9d3724cb7d49253c9d787fb97/plugins/homekit/src/types/camera/camera-streaming-srtp-sender.ts#L124-L143
rtp = opusRepacketizer.repacketize(rtp)

if (!rtp) {
return null
}

if (!firstTimestamp) {
firstTimestamp = rtp.header.timestamp
}

// from HAP spec:
// RTP Payload Format for Opus Speech and Audio Codec RFC 7587 with an exception
// that Opus audio RTP Timestamp shall be based on RFC 3550.
// RFC 3550 indicates that PCM audio based with a sample rate of 8k and a packet
// time of 20ms would have a monotonic interval of 8k / (1000 / 20) = 160.
// So 24k audio would have a monotonic interval of (24k / 8k) * 160 = 480.
// HAP spec also states that it may request packet times of 20, 30, 40, or 60.
// In practice, HAP has been seen to request 20 on LAN and 60 over LTE.
// So the RTP timestamp must scale accordingly.
// Further investigation indicates that HAP doesn't care about the actual sample rate at all,
// that's merely a suggestion. When encoding Opus, it can seemingly be an arbitrary sample rate,
// audio will work so long as the rtp timestamps are created properly: which is a construct of the sample rate
// HAP requests, and the packet time is respected,
// opus 48khz will work just fine.
rtp.header.timestamp =
(firstTimestamp + audioPacketCount * 160 * audioIntervalScale) %
0xffffffff
audioPacketCount++
}

// encrypt the packet
const encryptedPacket = audioSrtpSession.encrypt(rtp.payload, rtp.header)

// send the encrypted packet to HomeKit
this.audioSplitter
.send(encryptedPacket, {
port: audioPort,
address: targetAddress,
})
.catch(logError)

return null
})
}

async activate(request: StartStreamRequest) {
let sentVideo = false
const {
targetAddress,
video: { port: videoPort },
} = this.prepareStreamRequest,
// use to encrypt Ring video to HomeKit
videoSrtpSession = new SrtpSession(getSessionConfig(this.videoSrtp))

// Set up packet forwarding for video stream
this.streamingSession.addSubscriptions(
this.streamingSession.onVideoRtp.subscribe(({ header, payload }) => {
Expand All @@ -243,43 +181,48 @@ class StreamingSessionWrapper {
}),
)

const transcodingPromise = this.streamingSession.startTranscoding({
input: ['-vn'],
audio: [
'-map',
'0:a',

// OPUS specific - it works, but audio is very choppy
'-acodec',
'libopus',
'-frame_duration',
request.audio.packet_time,
'-application',
'lowdelay',

// Shared options
'-flags',
'+global_header',
'-ac',
`${request.audio.channel}`,
'-ar',
`${request.audio.sample_rate}k`,
'-b:a',
`${request.audio.max_bit_rate}k`,
'-bufsize',
`${request.audio.max_bit_rate * 4}k`,
'-payload_type',
request.audio.pt,
'-ssrc',
this.audioSsrc,
'-f',
'rtp',
`rtp://127.0.0.1:${await this.repacketizeAudioSplitter
.portPromise}?pkt_size=376`,
],
video: false,
output: [],
})
// Set up packet forwarding for audio stream
this.streamingSession.addSubscriptions(
this.streamingSession.onAudioRtp.subscribe((rtp) => {
if (!firstAudioTimestamp) {
firstAudioTimestamp = rtp.header.timestamp
}

// borrowed from scrypted
// Source reference: https://github.com/koush/scrypted/blob/main/plugins/homekit/src/types/camera/opus-repacketizer.ts
const packets = opusRepacketizer.repacketize(rtp)

if (!packets) {
return
}

for (rtp of packets) {
// RTP Payload Format for Opus Speech and Audio Codec RFC 7587 with an exception
// that Opus audio RTP Timestamp shall be based on RFC 3550.
rtp.header.timestamp =
(firstAudioTimestamp +
audioPacketCount * 160 * audioIntervalScale) %
0xffffffff
audioPacketCount++

rtp.header.padding = false
rtp.header.ssrc = this.audioSsrc
rtp.header.payloadType = request.audio.pt

const encryptedPacket = audioSrtpSession.encrypt(
rtp.payload,
rtp.header,
)

this.audioSplitter
.send(encryptedPacket, {
port: audioPort,
address: targetAddress,
})
.catch(logError)
}
}),
)

let cameraSpeakerActive = false
// used to send return audio from HomeKit to Ring
Expand Down Expand Up @@ -337,14 +280,11 @@ class StreamingSessionWrapper {
returnAudioTranscodedSplitter.close()
})

this.listenForAudioPackets(request)
await returnAudioTranscoder.start()
await transcodingPromise
}

stop() {
this.audioSplitter.close()
this.repacketizeAudioSplitter.close()
this.videoSplitter.close()
this.streamingSession.stop()
}
Expand Down
71 changes: 46 additions & 25 deletions packages/homebridge-ring/opus-repacketizer.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// OpusRepacketizer is borrowed from scrypted
// Original source: https://github.com/koush/scrypted/blob/3150a3033515a3886af1e6b35a0ba7432b63e02b/plugins/homekit/src/types/camera/opus-repacketizer.ts
// OpusRepacketizer is borrowed from Scrypted
// Source references: https://github.com/koush/scrypted/blob/main/plugins/homekit/src/types/camera/opus-repacketizer.ts

import type { RtpPacket } from 'werift'

Expand Down Expand Up @@ -64,17 +64,18 @@ import type { RtpPacket } from 'werift'

export class OpusRepacketizer {
depacketized: Buffer[] = []
extraPackets = 0

constructor(public framesPerPacket: number) {}

// repacketize a packet with a single frame into a packet with multiple frames.
repacketize(packet: RtpPacket): RtpPacket | undefined {
repacketize(packet: RtpPacket): RtpPacket[] | undefined {
const code = packet.payload[0] & 0b00000011
let offset: number

// see Frame Length Coding in RFC
const decodeFrameLength = () => {
let frameLength = packet.payload.readUInt8(offset)
let frameLength = packet.payload.readUInt8(offset++)
if (frameLength >= 252) {
offset++
frameLength += packet.payload.readUInt8(offset) * 4
Expand All @@ -87,11 +88,15 @@ export class OpusRepacketizer {
// code 3: cbr/vbr signaled, variable packets

if (code === 0) {
if (this.framesPerPacket === 1 && !this.depacketized.length) return packet
if (this.framesPerPacket === 1 && !this.depacketized.length) {
return [packet]
}
// depacketize by stripping off the config byte
this.depacketized.push(packet.payload.subarray(1))
} else if (code === 1) {
if (this.framesPerPacket === 2 && !this.depacketized.length) return packet
if (this.framesPerPacket === 2 && !this.depacketized.length) {
return [packet]
}
// depacketize by dividing the remaining payload into two equal sized frames
const remaining = packet.payload.length - 1
if (remaining % 2) {
Expand All @@ -101,7 +106,9 @@ export class OpusRepacketizer {
this.depacketized.push(packet.payload.subarray(1, 1 + frameLength))
this.depacketized.push(packet.payload.subarray(1 + frameLength))
} else if (code === 2) {
if (this.framesPerPacket === 2 && !this.depacketized.length) return packet
if (this.framesPerPacket === 2 && !this.depacketized.length) {
return [packet]
}
offset = 1
// depacketize by dividing the remaining payload into two inequal sized frames
const frameLength = decodeFrameLength()
Expand All @@ -119,7 +126,7 @@ export class OpusRepacketizer {
this.framesPerPacket === packetFrameCount &&
!this.depacketized.length
) {
return packet
return [packet]
}
const paddingIndicator = frameCountByte & 0b01000000
offset = 2
Expand All @@ -146,38 +153,52 @@ export class OpusRepacketizer {
}
} else {
const frameLengths: number[] = []
for (let i = 0; i < packetFrameCount; i++) {
for (let i = 0; i < packetFrameCount - 1; i++) {
const frameLength = decodeFrameLength()
frameLengths.push(frameLength)
}
for (let i = 0; i < packetFrameCount; i++) {
for (let i = 0; i < frameLengths.length; i++) {
const frameLength = frameLengths[i],
start = offset
offset += frameLength
this.depacketized.push(packet.payload.subarray(start, offset))
}
const lastFrameLength = packet.payload.length - padding - offset
this.depacketized.push(
packet.payload.subarray(offset, offset + lastFrameLength),
)
}
}

if (this.depacketized.length < this.framesPerPacket) return
if (this.depacketized.length < this.framesPerPacket) return []

const ret: RtpPacket[] = []

const depacketized = this.depacketized.slice(0, this.framesPerPacket)
this.depacketized = this.depacketized.slice(this.framesPerPacket)
// eslint-disable-next-line no-constant-condition
while (true) {
if (this.depacketized.length < this.framesPerPacket) return ret

// reuse the config and stereo indicator, but change the code to 3.
let toc = packet.payload[0]
toc |= 0b00000011
// vbr | padding indicator | packet count
const frameCountByte = 0b10000000 | this.framesPerPacket,
newHeader: number[] = [toc, frameCountByte]
const depacketized = this.depacketized.slice(0, this.framesPerPacket)
this.depacketized = this.depacketized.slice(this.framesPerPacket)

// M-1 length bytes
newHeader.push(...depacketized.slice(0, -1).map((data) => data.length))
// reuse the config and stereo indicator, but change the code to 3.
let toc = packet.payload[0]
toc |= 0b00000011
// vbr | padding indicator | packet count
const frameCountByte = 0b10000000 | this.framesPerPacket,
newHeader: number[] = [toc, frameCountByte]

const headerBuffer = Buffer.from(newHeader),
payload = Buffer.concat([headerBuffer, ...depacketized])
// M-1 length bytes
newHeader.push(...depacketized.slice(0, -1).map((data) => data.length))

packet.payload = payload
return packet
const headerBuffer = Buffer.from(newHeader),
payload = Buffer.concat([headerBuffer, ...depacketized]),
newPacket = packet.clone()
if (ret.length) this.extraPackets++
newPacket.header.sequenceNumber =
(packet.header.sequenceNumber + this.extraPackets + 0x10000) % 0x10000
newPacket.payload = payload
ret.push(newPacket)
}
}
}

0 comments on commit 5c59b42

Please sign in to comment.