shithub: libvpx

Download patch

ref: 78071b3b97e0fd227d94bf8046c21bcd3ee7c433
parent: 8144132866650d7793a8249f3c377f8bf03e9242
author: Attila Nagy <attilanagy@google.com>
date: Fri Sep 16 09:54:06 EDT 2011

Multithreaded encoder, late sync loopfilter

Second shot at this...

Sync with loopfilter thread as late as possible, usually just at the
beginning of next frame encoding. This returns control to application
faster and allows a better multicore scaling.

When PSNR packets are generated the final filtered frame is needed
imediatly so we cannot delay the sync. Same has to be done when
internal frame is previewed.

Change-Id: I64e110c8b224dd967faefffd9c93dd8dbad4a5b5

--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -24,9 +24,9 @@
 extern void vp8_build_block_offsets(MACROBLOCK *x);
 extern void vp8_setup_block_ptrs(MACROBLOCK *x);
 
-extern void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
+extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
 
-static THREAD_FUNCTION loopfilter_thread(void *p_data)
+static THREAD_FUNCTION thread_loopfilter(void *p_data)
 {
     VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1);
     VP8_COMMON *cm = &cpi->common;
@@ -41,7 +41,7 @@
             if (cpi->b_multi_threaded == 0) // we're shutting down
                 break;
 
-            loopfilter_frame(cpi, cm);
+            vp8_loopfilter_frame(cpi, cm);
 
             sem_post(&cpi->h_event_end_lpf);
         }
@@ -468,6 +468,7 @@
 
     cpi->b_multi_threaded = 0;
     cpi->encoding_thread_count = 0;
+    cpi->b_lpf_running = 0;
 
     if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
     {
@@ -526,7 +527,7 @@
             sem_init(&cpi->h_event_end_lpf, 0, 0);
 
             lpfthd->ptr1 = (void *)cpi;
-            pthread_create(&cpi->h_filter_thread, 0, loopfilter_thread, lpfthd);
+            pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd);
         }
     }
 
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3086,7 +3086,7 @@
     }
 }
 
-void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
+void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
 {
     if (cm->no_lpf)
     {
@@ -3716,6 +3716,15 @@
             vp8_setup_key_frame(cpi);
         }
 
+#if CONFIG_MULTITHREAD
+        /*  wait for the last picture loopfilter thread done */
+        if (cpi->b_lpf_running)
+        {
+            sem_wait(&cpi->h_event_end_lpf);
+            cpi->b_lpf_running = 0;
+        }
+#endif
+
         // transform / motion compensation build reconstruction frame
         vp8_encode_frame(cpi);
 
@@ -4074,11 +4083,12 @@
     if (cpi->b_multi_threaded)
     {
         sem_post(&cpi->h_event_start_lpf); /* start loopfilter in separate thread */
+        cpi->b_lpf_running = 1;
     }
     else
 #endif
     {
-        loopfilter_frame(cpi, cm);
+        vp8_loopfilter_frame(cpi, cm);
     }
 
     update_reference_frames(cm);
@@ -4098,10 +4108,11 @@
     vp8_pack_bitstream(cpi, dest, dest_end, size);
 
 #if CONFIG_MULTITHREAD
-    /* wait for loopfilter thread done */
-    if (cpi->b_multi_threaded)
+    /* if PSNR packets are generated we have to wait for the lpf */
+    if (cpi->b_lpf_running && cpi->b_calculate_psnr)
     {
         sem_wait(&cpi->h_event_end_lpf);
+        cpi->b_lpf_running = 0;
     }
 #endif
 
@@ -5107,6 +5118,15 @@
     else
     {
         int ret;
+
+#if CONFIG_MULTITHREAD
+        if(cpi->b_lpf_running)
+        {
+            sem_wait(&cpi->h_event_end_lpf);
+            cpi->b_lpf_running = 0;
+        }
+#endif
+
 #if CONFIG_POSTPROC
         ret = vp8_post_proc_frame(&cpi->common, dest, flags);
 #else
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -526,6 +526,7 @@
     int mt_sync_range;
     int b_multi_threaded;
     int encoding_thread_count;
+    int b_lpf_running;
 
     pthread_t *h_encoding_thread;
     pthread_t h_filter_thread;