Skip to content

Commit

Permalink
Merge pull request #275 from Eyevinn/fix-wvtt-unified
Browse files Browse the repository at this point in the history
fix: make wvttlister work with Unified Streaming wvtt ismt  file
  • Loading branch information
tobbee authored Oct 3, 2023
2 parents 2527ed1 + 59a7725 commit cb27664
Show file tree
Hide file tree
Showing 10 changed files with 217 additions and 58 deletions.
18 changes: 17 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

- Nothing yet
### Changed

- TfraEntry Time and MoofOffset types changed to unsigned
- TfraEntr attribute name SampleDelta corrected to SampleNumber

### Added

- MediaSegment and Fragment have new StartPos attribute
- mp4.File now has Mfra pointer
- MfraBox has new method FindEntry

### Fixed

- mp4ff-wvttlister works with Unified Streaming wvtt ismt file
- Fragment.GetFullSamples() allows tfdt to be absent
- Fragment.GetFullSamples() defaults to offset being moof
- mp4ff-wvttlister works for Unified Streaming wvtt asset

## [0.38.1] - 2023-09-22

Expand Down
65 changes: 43 additions & 22 deletions cmd/mp4ff-wvttlister/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"bytes"
"flag"
"fmt"
"io"
"log"
"os"
"strings"
Expand All @@ -15,7 +16,7 @@ import (
var usg = `Usage of mp4ff-wvttlister:
mp4ff-wvttlister lists and displays content of wvtt (WebVTT in ISOBMFF) samples.
Use track with given non-zero track ID or first wvtt track found in an asset.
Uses track with given non-zero track ID or first wvtt track found in an asset.
`

var usage = func() {
Expand Down Expand Up @@ -49,26 +50,33 @@ func main() {
log.Fatalln(err)
}
defer ifd.Close()
parsedMp4, err := mp4.DecodeFile(ifd)

err = run(ifd, os.Stdout, *trackID, *maxNrSamples)
if err != nil {
log.Fatal(err)
}
}

func run(ifd io.ReadSeeker, w io.Writer, trackID, maxNrSamples int) error {
parsedMp4, err := mp4.DecodeFile(ifd, mp4.WithDecodeFlags(mp4.DecISMFlag))
if err != nil {
return err
}

if !parsedMp4.IsFragmented() { // Progressive file
err = parseProgressiveMp4(parsedMp4, uint32(*trackID), *maxNrSamples)
err = parseProgressiveMp4(parsedMp4, w, uint32(trackID), maxNrSamples)
if err != nil {
fmt.Printf("Error: %s\n", err)
os.Exit(1)
return err
}
return
return nil
}

// Fragmented file
err = parseFragmentedMp4(parsedMp4, uint32(*trackID), *maxNrSamples)
err = parseFragmentedMp4(parsedMp4, w, uint32(trackID), maxNrSamples)
if err != nil {
fmt.Printf("Error: %s\n", err)
os.Exit(1)
return err
}
return nil
}

func findTrack(moov *mp4.MoovBox, hdlrType string, trackID uint32) (*mp4.TrakBox, error) {
Expand All @@ -84,21 +92,21 @@ func findTrack(moov *mp4.MoovBox, hdlrType string, trackID uint32) (*mp4.TrakBox
}
return inTrak, nil
}
return nil, fmt.Errorf("No matching track found")
return nil, fmt.Errorf("no matching track found")
}

func parseProgressiveMp4(f *mp4.File, trackID uint32, maxNrSamples int) error {
func parseProgressiveMp4(f *mp4.File, w io.Writer, trackID uint32, maxNrSamples int) error {
wvttTrak, err := findTrack(f.Moov, "text", trackID)
if err != nil {
return err
}

stbl := wvttTrak.Mdia.Minf.Stbl
if stbl.Stsd.Wvtt == nil {
return fmt.Errorf("No wvtt track found")
return fmt.Errorf("no wvtt track found")
}

fmt.Printf("Track %d, timescale = %d\n", wvttTrak.Tkhd.TrackID, wvttTrak.Mdia.Mdhd.Timescale)
fmt.Fprintf(w, "Track %d, timescale = %d\n", wvttTrak.Tkhd.TrackID, wvttTrak.Mdia.Mdhd.Timescale)
err = stbl.Stsd.Wvtt.VttC.Info(os.Stdout, "", " ", " ")
if err != nil {
return err
Expand Down Expand Up @@ -129,7 +137,7 @@ func parseProgressiveMp4(f *mp4.File, trackID uint32, maxNrSamples int) error {
// Next find sample bytes as slice in mdat
offsetInMdatData := uint64(offset) - mdatPayloadStart
sample := mdat.Data[offsetInMdatData : offsetInMdatData+uint64(size)]
err = printWvttSample(sample, sampleNr, decTime+uint64(cto), dur)
err = printWvttSample(w, sample, sampleNr, decTime+uint64(cto), dur)
if err != nil {
return err
}
Expand All @@ -140,7 +148,7 @@ func parseProgressiveMp4(f *mp4.File, trackID uint32, maxNrSamples int) error {
return nil
}

func parseFragmentedMp4(f *mp4.File, trackID uint32, maxNrSamples int) error {
func parseFragmentedMp4(f *mp4.File, w io.Writer, trackID uint32, maxNrSamples int) error {
var wvttTrex *mp4.TrexBox
if f.Init != nil { // Print vttC header and timescale if moov-box is present
wvttTrak, err := findTrack(f.Init.Moov, "text", trackID)
Expand All @@ -150,11 +158,11 @@ func parseFragmentedMp4(f *mp4.File, trackID uint32, maxNrSamples int) error {

stbl := wvttTrak.Mdia.Minf.Stbl
if stbl.Stsd.Wvtt == nil {
return fmt.Errorf("No wvtt track found")
return fmt.Errorf("no wvtt track found")
}

fmt.Printf("Track %d, timescale = %d\n", wvttTrak.Tkhd.TrackID, wvttTrak.Mdia.Mdhd.Timescale)
err = stbl.Stsd.Wvtt.VttC.Info(os.Stdout, "", " ", " ")
fmt.Fprintf(w, "Track %d, timescale = %d\n", wvttTrak.Tkhd.TrackID, wvttTrak.Mdia.Mdhd.Timescale)
err = stbl.Stsd.Wvtt.VttC.Info(w, " ", "", " ")
if err != nil {
return err
}
Expand All @@ -167,16 +175,29 @@ func parseFragmentedMp4(f *mp4.File, trackID uint32, maxNrSamples int) error {
iSamples := make([]mp4.FullSample, 0)
for _, iSeg := range f.Segments {
for _, iFrag := range iSeg.Fragments {
var tfraTime uint64
if f.Mfra != nil {
moofOffset := iFrag.Moof.StartPos
entry := f.Mfra.FindEntry(moofOffset, iFrag.Moof.Traf.Tfhd.TrackID)
if entry != nil {
tfraTime = entry.Time
}
}
fSamples, err := iFrag.GetFullSamples(wvttTrex)
if err != nil {
return err
}
if tfraTime != 0 && fSamples[0].DecodeTime == 0 {
for i := range fSamples {
fSamples[i].DecodeTime += tfraTime
}
}
iSamples = append(iSamples, fSamples...)
}
}
var err error
for i, sample := range iSamples {
err = printWvttSample(sample.Data, i+1, sample.PresentationTime(), sample.Dur)
err = printWvttSample(w, sample.Data, i+1, sample.PresentationTime(), sample.Dur)

if err != nil {
return err
Expand All @@ -188,12 +209,12 @@ func parseFragmentedMp4(f *mp4.File, trackID uint32, maxNrSamples int) error {
return nil
}

func printWvttSample(sample []byte, nr int, pts uint64, dur uint32) error {
fmt.Printf("Sample %d, pts=%d, dur=%d\n", nr, pts, dur)
func printWvttSample(w io.Writer, sample []byte, nr int, pts uint64, dur uint32) error {
fmt.Fprintf(w, "Sample %d, pts=%d, dur=%d\n", nr, pts, dur)
buf := bytes.NewBuffer(sample)
box, err := mp4.DecodeBox(0, buf)
if err != nil {
return err
}
return box.Info(os.Stdout, "", " ", " ")
return box.Info(w, " ", "", " ")
}
81 changes: 81 additions & 0 deletions cmd/mp4ff-wvttlister/main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package main

import (
"bytes"
"os"
"strings"
"testing"
)

var wanted = `Track 1, timescale = 1000
[vttC] size=14
- config: "WEBVTT"
Sample 1, pts=0, dur=6640
[vttc] size=52
[sttg] size=18
- settings: align:left
[payl] size=26
- cueText: "<c.magenta>...</c>"
Sample 2, pts=6640, dur=320
[vtte] size=8
Sample 3, pts=6960, dur=3040
[vttc] size=129
[sttg] size=20
- settings: align:center
[payl] size=89
- cueText: "<c.magenta>-Tout, tout, tout pourri,</c>\n<c.magenta>tout, tout, tout plaplat,</c>"
[vsid] size=12
- sourceID: 4068696550
Sample 4, pts=10000, dur=880
[vttc] size=129
[sttg] size=20
- settings: align:center
[payl] size=89
- cueText: "<c.magenta>-Tout, tout, tout pourri,</c>\n<c.magenta>tout, tout, tout plaplat,</c>"
[vsid] size=12
- sourceID: 4068696550
Sample 5, pts=10880, dur=320
[vtte] size=8
Sample 6, pts=11200, dur=3160
[vttc] size=127
[sttg] size=20
- settings: align:center
[payl] size=99
- cueText: "<c.magenta>Chien Pourri et Chaplapla,</c>\n<c.magenta>c'est moi, le chien, toi, le chat.</c>"
Sample 7, pts=14360, dur=320
[vtte] size=8
Sample 8, pts=14680, dur=5320
[vttc] size=131
[sttg] size=20
- settings: align:center
[payl] size=91
- cueText: "<c.magenta>Un ami, une poubelle,</c>\n<c.magenta>et pour nous, la vie est belle.</c>"
[vsid] size=12
- sourceID: 1833399447
`

func TestWvttLister(t *testing.T) {
testFile := "testdata/sample_short.ismt"

ifh, err := os.Open(testFile)
if err != nil {
t.Error(err)
}
defer ifh.Close()
var w bytes.Buffer
err = run(ifh, &w, 0, -1)
if err != nil {
t.Error(err)
}
got := w.String()
gotLines := strings.Split(got, "\n")
wantedLines := strings.Split(wanted, "\n")
if len(gotLines) != len(wantedLines) {
t.Errorf("got %d lines, wanted %d", len(gotLines), len(wantedLines))
}
for i := range gotLines {
if gotLines[i] != wantedLines[i] {
t.Errorf("line %d: got: %q\n wanted %q", i, gotLines[i], wantedLines[i])
}
}
}
Binary file added cmd/mp4ff-wvttlister/testdata/sample_short.ismt
Binary file not shown.
27 changes: 21 additions & 6 deletions mp4/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ type File struct {
Init *InitSegment // Init data (ftyp + moov for fragmented file)
Sidx *SidxBox // The first sidx box for a DASH OnDemand file
Sidxs []*SidxBox // All sidx boxes for a DASH OnDemand file
tfra *TfraBox // Single tfra box read at end for segmentation of ISM files
tfra *TfraBox // Single tfra box read first if DecISMFlag set
Mfra *MfraBox // MfraBox for ISM files
Segments []*MediaSegment // Media segments
Children []Box // All top-level boxes in order
FragEncMode EncFragFileMode // Determine how fragmented files are encoded
Expand Down Expand Up @@ -261,14 +262,14 @@ func (f *File) AddChild(child Box, boxStartPos uint64) {
case *StypBox:
// Starts a new segment
f.isFragmented = true
f.AddMediaSegment(NewMediaSegmentWithStyp(box))
f.AddMediaSegment(&MediaSegment{Styp: box, StartPos: boxStartPos})
case *EmsgBox:
// emsg box is only added at the start of a fragment (inside a segment).
// The case that a segment starts without an emsg is also handled.
f.startSegmentIfNeeded(box, boxStartPos)
lastSeg := f.LastSegment()
if len(lastSeg.Fragments) == 0 {
lastSeg.AddFragment(NewFragment())
lastSeg.AddFragment(&Fragment{StartPos: boxStartPos})
}
frag := lastSeg.LastFragment()
frag.AddChild(box)
Expand All @@ -280,7 +281,7 @@ func (f *File) AddChild(child Box, boxStartPos uint64) {
currSeg := f.LastSegment()
lastFrag := currSeg.LastFragment()
if lastFrag == nil || lastFrag.Moof != nil {
currSeg.AddFragment(NewFragment())
currSeg.AddFragment(&Fragment{StartPos: boxStartPos})
}
frag := currSeg.LastFragment()
frag.AddChild(moof)
Expand All @@ -291,11 +292,13 @@ func (f *File) AddChild(child Box, boxStartPos uint64) {
currentFragment := f.LastSegment().LastFragment()
currentFragment.AddChild(box)
}
case *MfraBox:
f.Mfra = box
}
f.Children = append(f.Children, child)
}

// startSegmentIfNeeded starts a new segment if there is none or if position match with sidx.
// startSegmentIfNeeded starts a new segment if there is none or if position match with sidx of tfra.
func (f *File) startSegmentIfNeeded(b Box, boxStartPos uint64) {
segStart := false
idx := len(f.Segments)
Expand All @@ -320,7 +323,13 @@ func (f *File) startSegmentIfNeeded(b Box, boxStartPos uint64) {
}
if segStart {
f.isFragmented = true
f.AddMediaSegment(NewMediaSegmentWithoutStyp())
ms := MediaSegment{
Styp: nil,
Fragments: nil,
EncOptimize: OptimizeNone,
StartPos: boxStartPos,
}
f.AddMediaSegment(&ms)
return
}
}
Expand Down Expand Up @@ -444,6 +453,12 @@ func (f *File) Encode(w io.Writer) error {
return err
}
}
if f.Mfra != nil {
err := f.Mfra.Encode(w)
if err != nil {
return err
}
}
case EncModeBoxTree:
for _, b := range f.Children {
err := b.Encode(w)
Expand Down
9 changes: 7 additions & 2 deletions mp4/fragment.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ type Fragment struct {
Children []Box // All top-level boxes in order
nextTrunNr uint32 // To handle multi-trun cases
EncOptimize EncOptimize // Bit field with optimizations being done at encoding
StartPos uint64 // Start position in file added by parser
}

// NewFragment creates an empty MP4 Fragment.
Expand Down Expand Up @@ -127,12 +128,16 @@ func (f *Fragment) GetFullSamples(trex *TrexBox) ([]FullSample, error) {
traf = moof.Traf // The first one
}
tfhd := traf.Tfhd
baseTime := traf.Tfdt.BaseMediaDecodeTime()
var baseTime uint64
if traf.Tfdt != nil {
baseTime = traf.Tfdt.BaseMediaDecodeTime()
}
moofStartPos := moof.StartPos
var samples []FullSample
for _, trun := range traf.Truns {
totalDur := trun.AddSampleDefaultValues(tfhd, trex)
var baseOffset uint64
// The default is moofStartPos according to Section 8.8.7.1
baseOffset := moofStartPos
if tfhd.HasBaseDataOffset() {
baseOffset = tfhd.BaseDataOffset
} else if tfhd.DefaultBaseIfMoof() {
Expand Down
1 change: 1 addition & 0 deletions mp4/mediasegment.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ type MediaSegment struct {
Sidxs []*SidxBox // All sidx boxes in a segment
Fragments []*Fragment
EncOptimize EncOptimize
StartPos uint64 // Start position in file
}

// NewMediaSegment - create empty MediaSegment with CMAF styp box
Expand Down
10 changes: 10 additions & 0 deletions mp4/mfra.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,13 @@ func (m *MfraBox) GetChildren() []Box {
func (m *MfraBox) Info(w io.Writer, specificBoxLevels, indent, indentStep string) error {
return ContainerInfo(m, w, specificBoxLevels, indent, indentStep)
}

// FindEntry - find tfra entry for given moof start offset and trackID. Return nil if not found.
func (m *MfraBox) FindEntry(moofStart uint64, trackID uint32) *TfraEntry {
for _, tfra := range m.Tfras {
if tfra.TrackID == trackID {
return tfra.FindEntry(moofStart)
}
}
return nil
}
Loading

0 comments on commit cb27664

Please sign in to comment.