shithub: opusfile

Download patch

ref: 661459b4aad3511e28976d8dfd84e02b36cc3d63
parent: 33d179715a45f8171408ae7ba0de8d5a377ae668
author: Timothy B. Terriberry <tterribe@xiph.org>
date: Tue Dec 15 08:56:34 EST 2015

Buffer continued packet data to reduce seeking.

In commit 41c29626afbd I claimed that it was not worth the
 machinery to buffer an extra page to avoid a seek when we have
 continued packet data.
That was a little unsatisfying, considering how much effort we make
 to avoid unnecessary seeking elsewhere, but in the general case,
 we might have to buffer an arbitrary number of pages, since a
 packet can span several.
However, we already have the mechanism to do this buffering: the
 ogg_stream_state.
There are a number of potentially nasty corner-cases, but libogg's
 page sequence number tracking prevents us from accidentally gluing
 extraneous packet data onto some other unsuspecting packet, so I
 believe the chance of introducing new bugs here is manageable.

This reduces the number of seeks in Simon Jackson's continued-page
 test case by over 23%.

This also means we can handle pages without useful timestamps
 (including those multiplexed from another stream) between the last
 timestamped page at or before our target and the first timestamped
 page after our target without any additional seeks.
Previously we would scan all of this data, see that the
 'page_offset' of the most recent page we read was way beyond
 'best' (the end of the last timestamped page before our target),
 and then seek back and scan it all again.
This should greatly reduce the number of seeks we need in
 multiplexed streams, even if there are no continued packets.

--- a/src/internal.h
+++ b/src/internal.h
@@ -186,6 +186,11 @@
   opus_int32         cur_discard_count;
   /*The granule position of the previous packet (current packet start time).*/
   ogg_int64_t        prev_packet_gp;
+  /*The stream offset of the most recent page with completed packets, or -1.
+    This is only needed to recover continued packet data in the seeking logic,
+     when we use the current position as one of our bounds, only to later
+     discover it was the correct starting point.*/
+  opus_int64         prev_page_offset;
   /*The number of bytes read since the last bitrate query, including framing.*/
   opus_int64         bytes_tracked;
   /*The number of samples decoded since the last bitrate query.*/
--- a/src/opusfile.c
+++ b/src/opusfile.c
@@ -827,6 +827,7 @@
 static int op_find_initial_pcm_offset(OggOpusFile *_of,
  OggOpusLink *_link,ogg_page *_og){
   ogg_page     og;
+  opus_int64   page_offset;
   ogg_int64_t  pcm_start;
   ogg_int64_t  prev_packet_gp;
   ogg_int64_t  cur_page_gp;
@@ -844,13 +845,12 @@
      least once.*/
   total_duration=0;
   do{
-    opus_int64 llret;
-    llret=op_get_next_page(_of,_og,_of->end);
+    page_offset=op_get_next_page(_of,_og,_of->end);
     /*We should get a page unless the file is truncated or mangled.
       Otherwise there are no audio data packets in the whole logical stream.*/
-    if(OP_UNLIKELY(llret<0)){
+    if(OP_UNLIKELY(page_offset<0)){
       /*Fail if there was a read error.*/
-      if(llret<OP_FALSE)return (int)llret;
+      if(page_offset<OP_FALSE)return (int)page_offset;
       /*Fail if the pre-skip is non-zero, since it's asking us to skip more
          samples than exist.*/
       if(_link->head.pre_skip>0)return OP_EBADTIMESTAMP;
@@ -951,6 +951,7 @@
   _of->op_count=pi;
   _of->cur_discard_count=_link->head.pre_skip;
   _of->prev_packet_gp=_link->pcm_start=pcm_start;
+  _of->prev_page_offset=page_offset;
   return 0;
 }
 
@@ -1404,6 +1405,7 @@
   ogg_sync_state    oy_start;
   ogg_stream_state  os_start;
   ogg_packet       *op_start;
+  opus_int64        prev_page_offset;
   opus_int64        start_offset;
   int               start_op_count;
   int               ret;
@@ -1423,6 +1425,7 @@
   if(op_start==NULL)return OP_EFAULT;
   *&oy_start=_of->oy;
   *&os_start=_of->os;
+  prev_page_offset=_of->prev_page_offset;
   start_offset=_of->offset;
   memcpy(op_start,_of->op,sizeof(*op_start)*start_op_count);
   OP_ASSERT((*_of->callbacks.tell)(_of->source)==op_position(_of));
@@ -1439,6 +1442,7 @@
   memcpy(_of->op,op_start,sizeof(*_of->op)*start_op_count);
   _ogg_free(op_start);
   _of->prev_packet_gp=_of->links[0].pcm_start;
+  _of->prev_page_offset=prev_page_offset;
   _of->cur_discard_count=_of->links[0].head.pre_skip;
   if(OP_UNLIKELY(ret<0))return ret;
   /*And restore the position indicator.*/
@@ -1453,6 +1457,7 @@
   _of->op_count=0;
   _of->od_buffer_size=0;
   _of->prev_packet_gp=-1;
+  _of->prev_page_offset=-1;
   if(!_of->seekable){
     OP_ASSERT(_of->ready_state>=OP_INITSET);
     opus_tags_clear(&_of->links[0].tags);
@@ -1817,7 +1822,8 @@
            0) Need more data (only if _readp==0).
            1) Got at least one audio data packet.*/
 static int op_fetch_and_process_page(OggOpusFile *_of,
- ogg_page *_og,opus_int64 _page_pos,int _readp,int _spanp,int _ignore_holes){
+ ogg_page *_og,opus_int64 _page_offset,
+ int _readp,int _spanp,int _ignore_holes){
   OggOpusLink  *links;
   ogg_uint32_t  cur_serialno;
   int           seekable;
@@ -1845,9 +1851,9 @@
         _og=NULL;
       }
       /*Keep reading until we get a page with the correct serialno.*/
-      else _page_pos=op_get_next_page(_of,&og,_of->end);
+      else _page_offset=op_get_next_page(_of,&og,_of->end);
       /*EOF: Leave uninitialized.*/
-      if(_page_pos<0)return _page_pos<OP_FALSE?(int)_page_pos:OP_EOF;
+      if(_page_offset<0)return _page_offset<OP_FALSE?(int)_page_offset:OP_EOF;
       if(OP_LIKELY(_of->ready_state>=OP_STREAMSET)){
         if(cur_serialno!=(ogg_uint32_t)ogg_page_serialno(&og)){
           /*Two possibilities:
@@ -1892,8 +1898,9 @@
         _of->ready_state=OP_STREAMSET;
         /*If we're at the start of this link, initialize the granule position
            and pre-skip tracking.*/
-        if(_page_pos<=links[cur_link].data_offset){
+        if(_page_offset<=links[cur_link].data_offset){
           _of->prev_packet_gp=links[cur_link].pcm_start;
+          _of->prev_page_offset=-1;
           _of->cur_discard_count=links[cur_link].head.pre_skip;
           /*Ignore a hole at the start of a new link (this is common for
              streams joined in the middle) or after seeking.*/
@@ -2071,6 +2078,7 @@
           OP_ASSERT(total_duration==0);
         }
         _of->prev_packet_gp=prev_packet_gp;
+        _of->prev_page_offset=_page_offset;
         _of->op_count=pi;
         /*If end-trimming didn't trim all the packets, we're done.*/
         if(OP_LIKELY(pi>0))return 1;
@@ -2146,7 +2154,7 @@
 
 /*A small helper to determine if an Ogg page contains data that continues onto
    a subsequent page.*/
-static int op_page_continues(ogg_page *_og){
+static int op_page_continues(const ogg_page *_og){
   int nlacing;
   OP_ASSERT(_og->header_len>=27);
   nlacing=_og->header[26];
@@ -2156,6 +2164,15 @@
   return _og->header[27+nlacing-1]==255;
 }
 
+/*A small helper to buffer the continued packet data from a page.*/
+static void op_buffer_continued_data(OggOpusFile *_of,ogg_page *_og){
+  ogg_packet op;
+  ogg_stream_pagein(&_of->os,_og);
+  /*Drain any packets that did end on this page (and ignore holes).
+    We only care about the continued packet data.*/
+  while(ogg_stream_packetout(&_of->os,&op));
+}
+
 /*This controls how close the target has to be to use the current stream
    position to subdivide the initial range.
   Two minutes seems to be a good default.*/
@@ -2193,7 +2210,7 @@
   opus_int64         d1;
   opus_int64         d2;
   int                force_bisect;
-  int                reset_needed;
+  int                buffering;
   int                ret;
   _of->bytes_tracked=0;
   _of->samples_tracked=0;
@@ -2203,7 +2220,7 @@
   serialno=link->serialno;
   best=best_start=begin=link->data_offset;
   page_offset=-1;
-  reset_needed=1;
+  buffering=0;
   /*We discard the first 80 ms of data after a seek, so seek back that much
      farther.
     If we can't, simply seek to the beginning of the link.*/
@@ -2247,11 +2264,20 @@
           if(diff<0){
             OP_ASSERT(offset>=begin);
             if(offset-begin>=end-begin>>1||diff>-OP_CUR_TIME_THRESH){
-              best=best_start=begin=offset;
+              best=begin=offset;
               best_gp=pcm_start=gp;
-              /*Don't reset the Ogg stream state, or we'll dump any continued
-                 packets from previous pages.*/
-              reset_needed=0;
+              /*If we have buffered data from a continued packet, remember the
+                 offset of the previous page's start, so that if we do wind up
+                 having to seek back here later, we can prime the stream with
+                 the continued packet data.
+                With no continued packet, we remember the end of the page.*/
+              best_start=_of->os.body_returned<_of->os.body_fill?
+               _of->prev_page_offset:best;
+              /*If there's completed packets and data in the stream state,
+                 prev_page_offset should always be set.*/
+              OP_ASSERT(best_start>=0);
+              /*Buffer any continued packet data starting from here.*/
+              buffering=1;
             }
           }
           else{
@@ -2270,6 +2296,7 @@
               _of->op_pos=0;
               _of->od_buffer_size=0;
               _of->prev_packet_gp=prev_page_gp;
+              /*_of->prev_page_offset already points to the right place.*/
               _of->ready_state=OP_STREAMSET;
               return op_make_decode_ready(_of);
             }
@@ -2290,6 +2317,9 @@
      Vinen)" from libvorbisfile.
     It has been modified substantially since.*/
   op_decode_clear(_of);
+  if(!buffering)ogg_stream_reset_serialno(&_of->os,serialno);
+  _of->cur_link=_li;
+  _of->ready_state=OP_STREAMSET;
   /*Initialize the interval size history.*/
   d2=d1=d0=end-begin;
   force_bisect=0;
@@ -2315,6 +2345,9 @@
       force_bisect=0;
     }
     if(bisect!=_of->offset){
+      /*Discard any buffered continued packet data.*/
+      if(buffering)ogg_stream_reset(&_of->os);
+      buffering=0;
       page_offset=-1;
       ret=op_seek_helper(_of,bisect);
       if(OP_UNLIKELY(ret<0))return ret;
@@ -2321,6 +2354,14 @@
     }
     chunk_size=OP_CHUNK_SIZE;
     next_boundary=boundary;
+    /*Now scan forward and figure out where we landed.
+      In the ideal case, we will see a page with a granule position at or
+       before our target, followed by a page with a granule position after our
+       target (or the end of the search interval).
+      Then we can just drop out and will have all of the data we need with no
+       additional seeking.
+      If we landed too far before, or after, we'll break out and do another
+       bisection.*/
     while(begin<end){
       page_offset=op_get_next_page(_of,&og,boundary);
       if(page_offset<0){
@@ -2330,7 +2371,10 @@
         /*If we scanned the whole interval, we're done.*/
         if(bisect<=begin+1)end=begin;
         else{
-          /*Otherwise, back up one chunk.*/
+          /*Otherwise, back up one chunk.
+            First, discard any data from a continued packet.*/
+          if(buffering)ogg_stream_reset(&_of->os);
+          buffering=0;
           bisect=OP_MAX(bisect-chunk_size,begin);
           ret=op_seek_helper(_of,bisect);
           if(OP_UNLIKELY(ret<0))return ret;
@@ -2343,15 +2387,32 @@
       }
       else{
         ogg_int64_t gp;
+        int         has_packets;
         /*Save the offset of the first page we found after the seek, regardless
            of the stream it came from or whether or not it has a timestamp.*/
         next_boundary=OP_MIN(page_offset,next_boundary);
         if(serialno!=(ogg_uint32_t)ogg_page_serialno(&og))continue;
-        /*Ignore the granule position on pages where no packets end.
-          Otherwise we wouldn't properly track continued packets through
-           them.*/
-        gp=ogg_page_packets(&og)>0?ogg_page_granulepos(&og):-1;
-        if(gp==-1)continue;
+        has_packets=ogg_page_packets(&og)>0;
+        /*Force the gp to -1 (as it should be per spec) if no packets end on
+           this page.
+          Otherwise we might get confused when we try to pull out a packet
+           with that timestamp and can't find it.*/
+        gp=has_packets?ogg_page_granulepos(&og):-1;
+        if(gp==-1){
+          if(buffering){
+            if(OP_LIKELY(!has_packets))ogg_stream_pagein(&_of->os,&og);
+            else{
+              /*If packets did end on this page, but we still didn't have a
+                 valid granule position (in violation of the spec!), stop
+                 buffering continued packet data.
+                Otherwise we might continue past the packet we actually
+                 wanted.*/
+              ogg_stream_reset(&_of->os);
+              buffering=0;
+            }
+          }
+          continue;
+        }
         if(op_granpos_cmp(gp,_target_gp)<0){
           /*We found a page that ends before our target.
             Advance to the raw offset of the next page.*/
@@ -2364,16 +2425,22 @@
           }
           /*Save the byte offset of the end of the page with this granule
              position.*/
-          best=begin;
-          /*If this page ends with a continued packet, remember the offset of
-             its start, so that we can prime the stream with the continued
-             packet data.
-            This will likely mean an extra seek backwards, since chances are
-             we will scan forward at least one more page to figure out where
-             we're stopping, but continued packets are rare enough it's not
-             worth the machinery to buffer two pages instead of one to avoid
-             that seek.*/
-          best_start=op_page_continues(&og)?page_offset:best;
+          best=best_start=begin;
+          /*Buffer any data from a continued packet, if necessary.
+            This avoids the need to seek back here if the next timestamp we
+             encounter while scanning forward lies after our target.*/
+          if(buffering)ogg_stream_reset(&_of->os);
+          if(op_page_continues(&og)){
+            op_buffer_continued_data(_of,&og);
+            /*If we have a continued packet, remember the offset of this
+               page's start, so that if we do wind up having to seek back here
+               later, we can prime the stream with the continued packet data.
+              With no continued packet, we remember the end of the page.*/
+            best_start=page_offset;
+          }
+          /*Then force buffering on, so that if a packet starts (but does not
+             end) on the next page, we still avoid the extra seek back.*/
+          buffering=1;
           best_gp=pcm_start=gp;
           OP_ALWAYS_TRUE(!op_granpos_diff(&diff,_target_gp,pcm_start));
           /*If we're more than a second away from our target, break out and
@@ -2405,37 +2472,35 @@
       }
     }
   }
-  /*Found our page.
-    The packets we want will end on pages that come after best, but the first
-     packet might be continued from data in the best page itself.
-    In that case, best_start will point to the start of the best page, rather
-     than the end, because that's where we need to start reading.
-    When there is no danger of a continued packet, best_start==best.*/
-  /*Seek, if necessary.*/
-  if(best_start!=page_offset){
-    page_offset=-1;
-    ret=op_seek_helper(_of,best_start);
-    if(OP_UNLIKELY(ret<0))return ret;
-  }
+  /*Found our page.*/
   OP_ASSERT(op_granpos_cmp(best_gp,pcm_start)>=0);
-  _of->cur_link=_li;
-  _of->ready_state=OP_STREAMSET;
-  /*Update prev_packet_gp to allow per-packet granule position assignment.
-    If best_start!=best, this is often wrong, but we'll be overwriting it
-     again shortly.*/
+  /*Seek, if necessary.
+    If we were buffering data from a continued packet, we should be able to
+     continue to scan forward to get the rest of the data (even if
+     page_offset==-1).
+    Otherwise, we need to seek back to best_start.*/
+  if(!buffering){
+    if(best_start!=page_offset){
+      page_offset=-1;
+      ret=op_seek_helper(_of,best_start);
+      if(OP_UNLIKELY(ret<0))return ret;
+    }
+    if(best_start<best){
+      /*Retrieve the page at best_start, if we do not already have it.*/
+      if(page_offset<0){
+        page_offset=op_get_next_page(_of,&og,link->end_offset);
+        if(OP_UNLIKELY(page_offset<OP_FALSE))return (int)page_offset;
+        if(OP_UNLIKELY(page_offset!=best_start))return OP_EBADLINK;
+      }
+      op_buffer_continued_data(_of,&og);
+      page_offset=-1;
+    }
+  }
+  /*Update prev_packet_gp to allow per-packet granule position assignment.*/
   _of->prev_packet_gp=best_gp;
-  if(reset_needed)ogg_stream_reset_serialno(&_of->os,serialno);
+  _of->prev_page_offset=best_start;
   ret=op_fetch_and_process_page(_of,page_offset<0?NULL:&og,page_offset,1,0,1);
   if(OP_UNLIKELY(ret<=0))return OP_EBADLINK;
-  if(best_start<best){
-    /*The previous call merely primed the stream state to make sure we got the
-       data for a continued packet.
-      Now load the packets we actually wanted.*/
-    _of->prev_packet_gp=best_gp;
-    _of->op_pos=_of->op_count;
-    ret=op_fetch_and_process_page(_of,NULL,-1,1,0,1);
-    if(OP_UNLIKELY(ret<=0))return OP_EBADLINK;
-  }
   /*Verify result.*/
   if(OP_UNLIKELY(op_granpos_cmp(_of->prev_packet_gp,_target_gp)>0)){
     return OP_EBADLINK;