ref: 8110553efd2b7b7d92284fbeb1886233fcffc07e
parent: 1cd6fa1aa2cd7eb7fc2beebec6ae51eb25450971
author: Sigrid Haflínudóttir <ftrvxmtrx@gmail.com>
date: Tue Mar 17 19:41:02 EDT 2020
AAC stream extraction working for tracks with defined stsd/stco/stsz
--- a/iso.c
+++ b/iso.c
@@ -2,13 +2,28 @@
#include <libc.h>
#include <bio.h>
+typedef struct Audio Audio;
typedef struct Box Box;
typedef struct RunSample RunSample;
-typedef struct Track Track;
typedef struct SampleToChunk SampleToChunk;
+typedef struct Track Track;
+typedef struct Video Video;
+struct Audio {
+ u32int format;
+ int channels;
+ int samplerate;
+};
+
+struct Video {
+ u32int format;
+ int width;
+ int height;
+};
+
struct Box {
vlong dsz;
+ vlong offset;
vlong dstart;
char extended[16];
u32int type;
@@ -73,9 +88,9 @@
}trun;
struct {
- u32int handlertype;
u32int entrycount;
- /* FIXME entries? */
+ Video video;
+ Audio audio;
}stsd;
struct {
@@ -99,9 +114,9 @@
}stco_co64;
struct {
- u32int samplesize;
+ u32int samplesizeeach;
u32int samplecount;
- u32int *entrysize;
+ u32int *samplesize;
}stsz; /* FIXME need stz2 as well */
struct {
@@ -127,10 +142,19 @@
vlong timeoffset;
};
+struct SampleToChunk {
+ u32int firstchunk;
+ u32int samplesperchunk;
+ u32int sdt;
+};
+
struct Track {
+ u32int handlertype;
+
u64int *chunkoffset;
u32int numchunks;
+ u32int samplesizeeach;
u32int *samplesize;
u32int numsamples;
@@ -137,16 +161,18 @@
SampleToChunk *stc;
u32int numstc;
+ Audio audio;
+ Video video;
+
int id;
};
-struct SampleToChunk {
- u32int firstchunk;
- u32int samplesperchunk;
- u32int sdt;
-};
-
enum {
+ HandlerVideo = 0x76696465u,
+ HandlerAudio = 0x736f756eu,
+
+ FmtMp4a = 0x6d703461u,
+
BoxUuid = 0x75756964u,
BoxFtyp = 0x66747970u,
BoxMoov = 0x6d6f6f76u,
@@ -182,7 +208,7 @@
BoxMdat = 0x6d646174u,
};
-#define bu16(x) ((x)[0]<<8 | (x)[1]<<16)
+#define bu16(x) ((x)[0]<<8 | (x)[1])
#define bu32(x) ((x)[0]<<24 | (x)[1]<<16 | (x)[2]<<8 | (x)[3])
#define bu64(x) ((u64int)(x)[0]<<56 | (u64int)(x)[1]<<48 | (u64int)(x)[2]<<40 | (u64int)(x)[3]<<32 | (x)[4]<<24 | (x)[5]<<16 | (x)[6]<<8 | (x)[7])
@@ -198,10 +224,11 @@
static int dflag;
static int dind;
-static int trackdump = -1;
static u32int defsamplesize;
static Track track;
+static Track *tracks;
+static int ntracks;
static int parsebox(Biobuf *f, Box *b, int *eof);
@@ -239,6 +266,11 @@
fprint(2, "\t%.*sversion\t%d\n", dind, ind, b->version);
fprint(2, "\t%.*sflags\t0x%ux\n", dind, ind, b->flags);
}
+ if(dflag > 1){
+ fprint(2, "\t%.*soffset\t%zd\n", dind, ind, b->offset);
+ fprint(2, "\t%.*sdstart\t%zd\n", dind, ind, b->dstart);
+ fprint(2, "\t%.*sdsize\t%zd\n", dind, ind, b->dsz);
+ }
if(b->type == BoxFtyp){
fprint(2, "\t%.*sbrand\t%T\n", dind, ind, b->ftyp.brand);
@@ -268,11 +300,11 @@
);
}else if(b->type == BoxTrex){
fprint(2, "\t%.*strackid\t0x%ux\n", dind, ind, b->trex.trackid);
- fprint(2, "\t%.*sdefsample.\n", dind, ind);
- fprint(2, "\t\t%.*s.descrindex\t0x%ux\n", dind, ind, b->trex.defsample.descrindex);
- fprint(2, "\t\t%.*s.duration\t%ud\n", dind, ind, b->trex.defsample.duration);
- fprint(2, "\t\t%.*s.size\t0x%ux\n", dind, ind, b->trex.defsample.size);
- fprint(2, "\t\t%.*s.flags\t0x%ux\n", dind, ind, b->trex.defsample.flags);
+ fprint(2, "\t%.*sdefsample\n", dind, ind);
+ fprint(2, "\t\t%.*sdescrindex\t0x%ux\n", dind, ind, b->trex.defsample.descrindex);
+ fprint(2, "\t\t%.*sduration\t%ud\n", dind, ind, b->trex.defsample.duration);
+ fprint(2, "\t\t%.*ssize\t0x%ux\n", dind, ind, b->trex.defsample.size);
+ fprint(2, "\t\t%.*sflags\t0x%ux\n", dind, ind, b->trex.defsample.flags);
}else if(b->type == BoxMfhd){
fprint(2, "\t%.*sseqnumber\t%ud\n", dind, ind, b->mfhd.seqnumber);
}else if(b->type == BoxTfhd){
@@ -279,15 +311,15 @@
fprint(2, "\t%.*strackid\t0x%ux\n", dind, ind, b->tfhd.trackid);
if(b->flags & 1)
fprint(2, "\t%.*sbaseoffset\t%zd\n", dind, ind, b->tfhd.baseoffset);
- fprint(2, "\t%.*sdefsample.\n", dind, ind);
+ fprint(2, "\t%.*sdefsample\n", dind, ind);
if(b->flags & 2)
- fprint(2, "\t\t%.*s.descrindex\t0x%ux\n", dind, ind, b->tfhd.defsample.descrindex);
+ fprint(2, "\t\t%.*sdescrindex\t0x%ux\n", dind, ind, b->tfhd.defsample.descrindex);
if(b->flags & 8)
- fprint(2, "\t\t%.*s.duration\t%ud\n", dind, ind, b->tfhd.defsample.duration);
+ fprint(2, "\t\t%.*sduration\t%ud\n", dind, ind, b->tfhd.defsample.duration);
if(b->flags & 16)
- fprint(2, "\t\t%.*s.size\t0x%ux\n", dind, ind, b->tfhd.defsample.size);
+ fprint(2, "\t\t%.*ssize\t0x%ux\n", dind, ind, b->tfhd.defsample.size);
if(b->flags & 32)
- fprint(2, "\t\t%.*s.flags\t0x%ux\n", dind, ind, b->tfhd.defsample.flags);
+ fprint(2, "\t\t%.*sflags\t0x%ux\n", dind, ind, b->tfhd.defsample.flags);
if(b->flags & 0x10000)
fprint(2, "\t%.*sduration is empty\n", dind, ind);
if(b->flags & 0x20000)
@@ -300,20 +332,29 @@
fprint(2, "\t%.*sdataoffset\t%d\n", dind, ind, b->trun.dataoffset);
if(b->flags & 2)
fprint(2, "\t%.*sfirstsampleflags\t0x%ux\n", dind, ind, b->trun.firstsampleflags);
- for(u = 0; u < b->trun.samplecount; u++){
+ for(u = 0; dflag > 1 && u < b->trun.samplecount; u++){
fprint(2, "\t%.*ssamples[%zd]\n", dind, ind, u);
if(b->flags & 0x100)
- fprint(2, "\t\t%.*s.duration\t%ud\n", dind, ind, b->trun.samples[u].duration);
+ fprint(2, "\t\t%.*sduration\t%ud\n", dind, ind, b->trun.samples[u].duration);
if(b->flags & 0x200)
- fprint(2, "\t\t%.*s.size\t%ud\n", dind, ind, b->trun.samples[u].size);
+ fprint(2, "\t\t%.*ssize\t%ud\n", dind, ind, b->trun.samples[u].size);
if(b->flags & 0x400)
- fprint(2, "\t\t%.*s.flags\t0x%ux\n", dind, ind, b->trun.samples[u].flags);
+ fprint(2, "\t\t%.*sflags\t0x%ux\n", dind, ind, b->trun.samples[u].flags);
if(b->flags & 0x800)
- fprint(2, "\t\t%.*s.timeoffset\t%zd\n", dind, ind, b->trun.samples[u].timeoffset);
+ fprint(2, "\t\t%.*stimeoffset\t%zd\n", dind, ind, b->trun.samples[u].timeoffset);
}
}else if(b->type == BoxStsd){
- fprint(2, "\t%.*shandler_type\t%08x\n", dind, ind, b->stsd.handlertype);
fprint(2, "\t%.*sentry_count\t%ud\n", dind, ind, b->stsd.entrycount);
+ if(b->stsd.video.format != 0){
+ fprint(2, "\t\t%.*svideo\t%T\n", dind, ind, b->stsd.video.format);
+ fprint(2, "\t\t\t%.*swidth\t%d\n", dind, ind, b->stsd.video.width);
+ fprint(2, "\t\t\t%.*sheight\t%d\n", dind, ind, b->stsd.video.height);
+ }
+ if(b->stsd.audio.format != 0){
+ fprint(2, "\t\t%.*saudio\t%T\n", dind, ind, b->stsd.audio.format);
+ fprint(2, "\t\t\t%.*schannels\t%d\n", dind, ind, b->stsd.audio.channels);
+ fprint(2, "\t\t\t%.*ssample_rate\t%d\n", dind, ind, b->stsd.audio.samplerate);
+ }
}else if(b->type == BoxStts){
fprint(2, "\t%.*sentry_count\t%ud\n", dind, ind, b->stts.entrycount);
}else if(b->type == BoxStss){
@@ -320,7 +361,7 @@
fprint(2, "\t%.*sentry_count\t%ud\n", dind, ind, b->stss.entrycount);
}else if(b->type == BoxStsc){
fprint(2, "\t%.*sentry_count\t%ud\n", dind, ind, b->stss.entrycount);
- for(u = 0; u < b->stsc.entrycount; u++){
+ for(u = 0; dflag > 1 && u < b->stsc.entrycount; u++){
fprint(2, "\t%.*sentry[%zd]\n", dind, ind, u);
fprint(2, "\t\t%.*sfirst_chunk\t%ud\n", dind, ind, b->stsc.entry[u].firstchunk);
fprint(2, "\t\t%.*ssamples_per_chunk\t%ud\n", dind, ind, b->stsc.entry[u].samplesperchunk);
@@ -327,11 +368,11 @@
fprint(2, "\t\t%.*ssample_description_table\t%ud\n", dind, ind, b->stsc.entry[u].sdt);
}
}else if(b->type == BoxStsz){
- fprint(2, "\t%.*ssample_size\t%ud\n", dind, ind, b->stsz.samplesize);
+ fprint(2, "\t%.*ssample_size\t%ud\n", dind, ind, b->stsz.samplesizeeach);
fprint(2, "\t%.*ssample_count\t%ud\n", dind, ind, b->stsz.samplecount);
- if(b->stsz.samplesize == 0){
+ if(dflag > 1 && b->stsz.samplesizeeach == 0){
for(u = 0; u < b->stsz.samplecount; u++)
- fprint(2, "\t%.*sentrysize[%zd]\t%ud\n", dind, ind, u, b->stsz.entrysize[u]);
+ fprint(2, "\t%.*ssamplesize[%zd]\t%ud\n", dind, ind, u, b->stsz.samplesize[u]);
}
}else if(b->type == BoxTkhd){
fprint(2, "\t%.*screation_time\t%zd\n", dind, ind, b->tkhd.creattime);
@@ -341,59 +382,182 @@
fprint(2, "\t%.*swidth\t%ud\n", dind, ind, b->tkhd.width);
fprint(2, "\t%.*sheight\t%ud\n", dind, ind, b->tkhd.height);
}else if(b->type == BoxHdlr){
- fprint(2, "\t%.*shandler_type\t%c%c%c%c\n", dind, ind, b->hdlr.handlertype>>24, b->hdlr.handlertype>>16&0xff, b->hdlr.handlertype>>8&0xff, b->hdlr.handlertype&0xff);
+ fprint(2, "\t%.*shandler_type\t%T\n", dind, ind, b->hdlr.handlertype);
fprint(2, "\t%.*sname\t%s\n", dind, ind, b->hdlr.name);
}else if(b->type == BoxStco || b->type == BoxCo64){
fprint(2, "\t%.*sentry_count\t%ud\n", dind, ind, b->stss.entrycount);
- for(u = 0; u < b->stco_co64.entrycount; u++)
+ for(u = 0; dflag > 1 && u < b->stco_co64.entrycount; u++)
fprint(2, "\t%.*schunkoffset[%zd]\t%zd\n", dind, ind, u, b->stco_co64.chunkoffset[u]);
- }else{
- fprint(2, "\t%.*sstart\t%zd\n", dind, ind, b->dstart);
- fprint(2, "\t%.*ssize\t%zd\n", dind, ind, b->dsz);
+ }else if(dflag < 2){
+ fprint(2, "\t%.*soffset\t%zd\n", dind, ind, b->offset);
+ fprint(2, "\t%.*sdstart\t%zd\n", dind, ind, b->dstart);
+ fprint(2, "\t%.*sdsize\t%zd\n", dind, ind, b->dsz);
}
}
static void
-dumptrack(Biobuf *f)
+addtrack(void)
{
+ tracks = realloc(tracks, (ntracks+1)*sizeof(*tracks));
+ memmove(&tracks[ntracks++], &track, sizeof(track));
+}
+
+static int srate2mpeg4fi[] = {
+ 96000,
+ 88200,
+ 64000,
+ 48000,
+ 44100,
+ 32000,
+ 24000,
+ 22050,
+ 16000,
+ 12000,
+ 11025,
+ 8000,
+ 7350,
+};
+
+static int
+dumptrack(Biobuf *f, int id)
+{
SampleToChunk *stc;
- u32int si, ch, lastch;
- u32int samplelast, sample, rawsz;
- vlong o;
+ u32int si, ch, nextch;
+ u32int samplelast, sample, rawsz, samplesz;
+ u64int o, wo;
+ int i;
u8int *raw;
Biobuf out;
+ Track *t;
+ u8int frame[7];
+ for(t = tracks, i = 0; i < ntracks && t->id != id; i++, t++);
+ if(i >= ntracks){
+ werrstr("no track %d", id);
+ return -1;
+ }
+
+ fprint(
+ 2,
+ "track %d: handler=%T numstc=%ud chunks=%ud samples=%ud\n", t->id, t->handlertype, t->numstc, t->numchunks, t->numsamples
+ );
+ if(t->audio.format != 0){
+ fprint(2, "audio: format=%T channels=%d samplerate=%d\n", t->audio.format, t->audio.channels, t->audio.samplerate);
+ if(t->audio.format == FmtMp4a){
+ for(i = 0; i < nelem(srate2mpeg4fi) && srate2mpeg4fi[i] != t->audio.samplerate; i++);
+ if(i >= nelem(srate2mpeg4fi)){
+ werrstr("audio: mpeg4: invalid sample rate %d", t->audio.samplerate);
+ return -1;
+ }
+ frame[0] = 0xff; /* syncword */
+ frame[1] = 0xf1; /* syncword, mpeg4, no crc */
+ frame[2] = 1<<6 | i<<2 | t->audio.channels>>2; /* AAC(LC)??? FIXME, frequency index, channels */
+ }else{
+ werrstr("audio: unknown format %T\n", t->audio.format);
+ return -1;
+ }
+ }
+ if(t->video.format != 0){
+ fprint(2, "video: format=%T resolution=%dx%d\n", t->video.format, t->video.width, t->video.height);
+ werrstr("video: unknown format %T", t->video.format);
+ return -1;
+ }
+
Binit(&out, 1, OWRITE);
raw = nil;
rawsz = 0;
sample = samplelast = 0;
- stc = track.stc;
- for(si = 0; si < track.numstc; si++, stc++){
- lastch = si+1 < track.numstc ? stc[1].firstchunk : track.numchunks;
- for(ch = stc->firstchunk-1; ch < lastch && ch < track.numchunks; ch++){
- o = track.chunkoffset[ch];
- if(Bseek(f, o, 0) != o)
- sysfatal("chunk %ud: %r", ch);
- for(; sample < samplelast+stc->samplesperchunk && sample < track.numsamples; sample++){
- if(track.samplesize[sample] == 0)
+ stc = t->stc;
+ wo = 0;
+ ch = 0;
+ for(si = 0; si < t->numstc; si++, stc++){
+ nextch = t->numchunks;
+ if(si+1 < t->numstc)
+ nextch = stc[1].firstchunk - 1;
+ for(; ch < nextch; ch++){
+ o = t->chunkoffset[ch];
+ if(Bseek(f, o, 0) != o){
+ werrstr("chunk %ud: %r", ch);
+ return -1;
+ }
+
+ for(; sample < samplelast+stc->samplesperchunk && sample < t->numsamples; sample++){
+ if((samplesz = t->samplesizeeach) == 0)
+ samplesz = t->samplesize[sample];
+ if(samplesz == 0)
break;
- if(rawsz < track.samplesize[sample]){
- rawsz = track.samplesize[sample] * 2;
+ if(rawsz < samplesz){
+ rawsz = samplesz * 2;
raw = realloc(raw, rawsz);
}
- if(Bread(f, raw, track.samplesize[sample]) != track.samplesize[sample])
- sysfatal("chunk %ud sample %ud size %ud: %r", ch, sample, track.samplesize[sample]);
- if(Bwrite(&out, raw, track.samplesize[sample]) != track.samplesize[sample])
- exits(nil);
+
+ if(Bread(f, raw, samplesz) != samplesz){
+ werrstr("chunk %ud sample %ud size %ud: %r", ch, sample, samplesz);
+ return -1;
+ }
+ if(t->audio.format == FmtMp4a){
+ samplesz += 7;
+ frame[3] = (t->audio.channels&3)<<6 | (samplesz>>11)&3; /* channels, frame length */
+ frame[4] = (samplesz>>3)&0xff; /* frame length */
+ frame[5] = (samplesz&7)<<5 | 0x1f; /* frame length, fullness */
+ frame[6] = 0xfc; /* fullness, number of frames */
+ samplesz -= 7;
+ if(Bwrite(&out, frame, 7) != 7){ /* EOF */
+ werrstr("eof");
+ break;
+ }
+ wo += 7;
+ o += 7;
+ }
+ if(Bwrite(&out, raw, samplesz) != samplesz){ /* EOF? */
+ werrstr("eof");
+ break;
+ }
+ wo += samplesz;
+ o += samplesz;
}
samplelast = sample;
}
}
- fprint(2, "%ud samples\n", sample);
Bterm(&out);
free(raw);
+
+ return 0;
};
+int
+sampleentry(Biobuf *f, Box *b, u32int fmt, int n)
+{
+ u8int d[96];
+
+ if(track.handlertype == HandlerVideo){
+ b->stsd.video.format = fmt;
+
+ /* predefined+reserved+predefined, width+height, hres+vres, reserved, framecount, compressor */
+ eBread(2+2+4*3 + 2+2 + 4+4 + 4 + 2 + 32, "SampleEntry: video");
+ b->stsd.video.width = bu16(d+16);
+ b->stsd.video.height = bu16(d+18);
+
+ memmove(&track.video, &b->stsd.video, sizeof(Video));
+ }else if(track.handlertype == HandlerAudio){
+ b->stsd.audio.format = fmt;
+
+ /* reserved+id, ver+rev+vendor, channels+bps, ?+?, sample rate */
+ eBread(2+4+2 + 2+2 + 2+2 + 4, "SampleEntry: audio");
+ b->stsd.audio.channels = bu16(d+8);
+ b->stsd.audio.samplerate = bu32(d+16)>>16;
+
+ memmove(&track.audio, &b->stsd.audio, sizeof(Audio));
+ /* FIXME do we care about the rest? */
+ }else{
+ fprint(2, "SampleEntry: unknown handler type %T\n", track.handlertype);
+ }
+
+ return n - sizeof(d);
+err:
+ return -1;
+}
+
static int
parseboxdata(Biobuf *f, Box *b)
{
@@ -432,14 +596,7 @@
break;
}
if(b->type == BoxTrak){
- if(track.id == trackdump){
- u = Boffset(f);
- dumptrack(f);
- Bseek(f, u, 0);
- }
- free(track.chunkoffset);
- free(track.samplesize);
- free(track.stc);
+ addtrack();
memset(&track, 0, sizeof(track));
}
dind--;
@@ -560,11 +717,17 @@
}
printbox(b);
}else if(b->type == BoxStsd){
- eBread(4, "handler_type");
- b->stsd.handlertype = bu32(d);
eBread(4, "entry_count");
b->stsd.entrycount = bu32(d);
- /* FIXME not reading actual entries here */
+ for(u = 0; u < b->stsd.entrycount; u++){
+ eBread(4, "size");
+ n = bu32(d);
+ eBread(4, "format");
+ Bseek(f, 6+2, 1); /* skip reserved+id */
+ n -= 8 + 6+2;
+ n = sampleentry(f, b, bu32(d), n);
+ Bseek(f, n, 1);
+ }
printbox(b);
}else if(b->type == BoxStts){
eBread(4, "entry_count");
@@ -604,17 +767,18 @@
printbox(b);
}else if(b->type == BoxStsz){
eBread(4, "sample_size");
- b->stsz.samplesize = bu32(d);
+ b->stsz.samplesizeeach = bu32(d);
+ track.samplesizeeach = b->stsz.samplesizeeach;
eBread(4, "sample_count");
b->stsz.samplecount = bu32(d);
- if(b->stsz.samplesize == 0){
- b->stsz.entrysize = calloc(b->stsz.samplecount, sizeof(u32int));
+ if(b->stsz.samplesizeeach == 0){
+ b->stsz.samplesize = calloc(b->stsz.samplecount, sizeof(u32int));
for(u = 0; u < b->stsz.samplecount; u++){
eBread(4, "chunk_offset");
- b->stsz.entrysize[u] = bu32(d);
+ b->stsz.samplesize[u] = bu32(d);
}
track.numsamples = b->stsz.samplecount;
- track.samplesize = b->stsz.entrysize;
+ track.samplesize = b->stsz.samplesize;
}
printbox(b);
}else if(b->type == BoxTkhd){
@@ -653,6 +817,7 @@
eBread(4, "pre_defined");
eBread(4, "handler_type");
b->hdlr.handlertype = bu32(d);
+ track.handlertype = b->hdlr.handlertype;
eBread(3*4, "reserved");
for(u = 0; u < sizeof(d)-1; u++){
if(Bread(f, d+u, 1) != 1){
@@ -683,7 +848,7 @@
int r;
*eof = 0;
- b->dstart = start = Boffset(f);
+ b->dstart = b->offset = start = Boffset(f);
if((r = Bread(f, d, 8)) != 8){
if(r == 0)
*eof = 1;
@@ -739,17 +904,19 @@
exits("usage");
}
-int
+void
main(int argc, char **argv)
{
- Biobuf *f;
Box b;
- int eof;
+ Biobuf *f;
+ char *status;
+ int eof, trackdump;
dflag = 0;
+ trackdump = -1;
ARGBEGIN{
case 'd':
- dflag = 1;
+ dflag++;
break;
case 't':
trackdump = atoi(EARGF(usage()));
@@ -758,7 +925,8 @@
fmtinstall('T', typefmt);
- for(; *argv; argv++){
+ status = nil;
+ for(; *argv && status == nil; argv++){
if((f = Bopen(*argv, OREAD)) == nil)
sysfatal("%s: %r", *argv);
@@ -773,8 +941,13 @@
Bseek(f, b.dstart+b.dsz, 0);
}
+ if(trackdump >= 0 && dumptrack(f, trackdump) != 0){
+ fprint(2, "%s: %r\n", *argv);
+ status = "dump";
+ }
+
Bterm(f);
}
- return 0;
+ exits(status);
}
--
⑨