diff --git a/readersampleprovider.go b/readersampleprovider.go index 51698c13..77372f67 100644 --- a/readersampleprovider.go +++ b/readersampleprovider.go @@ -17,7 +17,6 @@ import ( const ( // defaults to 30 fps defaultH264FrameDuration = 33 * time.Millisecond - defaultOpusFrameDuration = 20 * time.Millisecond ) // ReaderSampleProvider provides samples by reading from an io.ReadCloser implementation @@ -41,8 +40,7 @@ type ReaderSampleProvider struct { h264reader *h264reader.H264Reader // for ogg - oggreader *oggreader.OggReader - lastGranule uint64 + oggreader *oggreader.OggReader } type ReaderSampleProviderOption func(*ReaderSampleProvider) @@ -202,18 +200,19 @@ func (p *ReaderSampleProvider) NextSample() (media.Sample, error) { sample.Duration = time.Duration(p.ivfTimebase*float64(delta)*1000) * time.Millisecond p.lastTimestamp = header.Timestamp case webrtc.MimeTypeOpus: - pageData, pageHeader, err := p.oggreader.ParseNextPage() + // TODO(theomonnom): Read next OpusPacket instead of the next page + data, _, err := p.oggreader.ParseNextPage() if err != nil { return sample, err } - sampleCount := float64(pageHeader.GranulePosition - p.lastGranule) - p.lastGranule = pageHeader.GranulePosition - sample.Data = pageData - sample.Duration = time.Duration((sampleCount/48000)*1000) * time.Millisecond - if sample.Duration == 0 { - sample.Duration = defaultOpusFrameDuration + dur, err := ParseOpusPacketDuration(data) + if err != nil { + return sample, err } + + sample.Data = data + sample.Duration = dur } if p.FrameDuration > 0 { diff --git a/utils.go b/utils.go index 801f02dd..1d141d0f 100644 --- a/utils.go +++ b/utils.go @@ -7,9 +7,60 @@ import ( "github.com/pion/webrtc/v3" "github.com/thoas/go-funk" + "errors" "github.com/livekit/protocol/livekit" + "time" ) +var ( + ErrInvalidOpusPacket = errors.New("invalid opus packet") +) + +// Parse the duration of a an OpusPacket +// https://www.rfc-editor.org/rfc/rfc6716#section-3.1 +func ParseOpusPacketDuration(data []byte) (time.Duration, error) { + durations := [32]uint64{ + 480, 960, 1920, 2880, // Silk-Only + 480, 960, 1920, 2880, // Silk-Only + 480, 960, 1920, 2880, // Silk-Only + 480, 960, // Hybrid + 480, 960, // Hybrid + 120, 240, 480, 960, // Celt-Only + 120, 240, 480, 960, // Celt-Only + 120, 240, 480, 960, // Celt-Only + 120, 240, 480, 960, // Celt-Only + } + + if len(data) < 1 { + return 0, ErrInvalidOpusPacket + } + + toc := data[0] + var nframes int + switch toc & 3 { + case 0: + nframes = 1 + case 1: + nframes = 2 + case 2: + nframes = 2 + case 3: + if len(data) < 2 { + return 0, ErrInvalidOpusPacket + } + nframes = int(data[1] & 63) + } + + frameDuration := int64(durations[toc>>3]) + duration := int64(nframes * int(frameDuration)) + if duration > 5760 { // 120ms + return 0, ErrInvalidOpusPacket + } + + ms := duration * 1000 / 48000 + return time.Duration(ms) * time.Millisecond, nil +} + func ToProtoSessionDescription(sd webrtc.SessionDescription) *livekit.SessionDescription { return &livekit.SessionDescription{ Type: sd.Type.String(),