shithub: libvpx

Download patch

ref: 5fd142e763ce2760ff118b91b92714f1978e3d0b
parent: d7947c37a8ad9155c187d2b18421ca71c86a0b86
parent: 65732c36a8c8f60907b62d24c097782a0a0d7e1c
author: Paul Wilkins <paulwilkins@google.com>
date: Wed May 11 12:25:25 EDT 2016

Merge "Fixed 8K two pass encoder crash."

--- a/vp10/encoder/firstpass.c
+++ b/vp10/encoder/firstpass.c
@@ -45,7 +45,6 @@
 
 #define BOOST_BREAKOUT      12.5
 #define BOOST_FACTOR        12.5
-#define ERR_DIVISOR         128.0
 #define FACTOR_PT_LOW       0.70
 #define FACTOR_PT_HIGH      0.90
 #define FIRST_PASS_Q        10.0
@@ -231,6 +230,13 @@
   section->duration   -= frame->duration;
 }
 
+// Calculate the linear size relative to a baseline of 1080P
+#define BASE_SIZE 2073600.0  // 1920x1080
+static double get_linear_size_factor(const VP10_COMP *cpi) {
+  const double this_area = cpi->initial_width * cpi->initial_height;
+  return pow(this_area / BASE_SIZE, 0.5);
+}
+
 // Calculate an active area of the image that discounts formatting
 // bars and partially discounts other 0 energy areas.
 #define MIN_ACTIVE_AREA 0.5
@@ -1103,11 +1109,7 @@
   return fclamp(pow(error_term, power_term), 0.05, 5.0);
 }
 
-// Larger image formats are expected to be a little harder to code relatively
-// given the same prediction error score. This in part at least relates to the
-// increased size and hence coding cost of motion vectors.
-#define EDIV_SIZE_FACTOR 800
-
+#define ERR_DIVISOR         100.0
 static int get_twopass_worst_quality(const VP10_COMP *cpi,
                                      const double section_err,
                                      double inactive_zone,
@@ -1126,11 +1128,21 @@
     const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
     const double av_err_per_mb = section_err / active_mbs;
     const double speed_term = 1.0 + 0.04 * oxcf->speed;
-    const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
+    double ediv_size_correction;
     const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
                                          BPER_MB_NORMBITS) / active_mbs;
-
     int q;
+
+    // Larger image formats are expected to be a little harder to code
+    // relatively given the same prediction error score. This in part at
+    // least relates to the increased size and hence coding overheads of
+    // motion vectors. Some account of this is made through adjustment of
+    // the error divisor.
+    ediv_size_correction =
+        VPXMAX(0.2, VPXMIN(5.0, get_linear_size_factor(cpi)));
+    if (ediv_size_correction < 1.0)
+      ediv_size_correction = -(1.0 / ediv_size_correction);
+    ediv_size_correction *= 4.0;
 
     // Try and pick a max Q that will be high enough to encode the
     // content at the given rate.
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -45,7 +45,6 @@
 
 #define BOOST_BREAKOUT      12.5
 #define BOOST_FACTOR        12.5
-#define ERR_DIVISOR         128.0
 #define FACTOR_PT_LOW       0.70
 #define FACTOR_PT_HIGH      0.90
 #define FIRST_PASS_Q        10.0
@@ -237,6 +236,13 @@
   section->duration   -= frame->duration;
 }
 
+// Calculate the linear size relative to a baseline of 1080P
+#define BASE_SIZE 2073600.0  // 1920x1080
+static double get_linear_size_factor(const VP9_COMP *cpi) {
+  const double this_area = cpi->initial_width * cpi->initial_height;
+  return pow(this_area / BASE_SIZE, 0.5);
+}
+
 // Calculate an active area of the image that discounts formatting
 // bars and partially discounts other 0 energy areas.
 #define MIN_ACTIVE_AREA 0.5
@@ -1241,11 +1247,7 @@
   return fclamp(pow(error_term, power_term), 0.05, 5.0);
 }
 
-// Larger image formats are expected to be a little harder to code relatively
-// given the same prediction error score. This in part at least relates to the
-// increased size and hence coding cost of motion vectors.
-#define EDIV_SIZE_FACTOR 800
-
+#define ERR_DIVISOR         100.0
 static int get_twopass_worst_quality(const VP9_COMP *cpi,
                                      const double section_err,
                                      double inactive_zone,
@@ -1267,10 +1269,9 @@
     const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
     const double av_err_per_mb = section_err / active_mbs;
     const double speed_term = 1.0 + 0.04 * oxcf->speed;
-    const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
+    double ediv_size_correction;
     const int target_norm_bits_per_mb = ((uint64_t)target_rate <<
                                          BPER_MB_NORMBITS) / active_mbs;
-
     int q;
     int is_svc_upper_layer = 0;
 
@@ -1277,6 +1278,16 @@
     if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0)
       is_svc_upper_layer = 1;
 
+    // Larger image formats are expected to be a little harder to code
+    // relatively given the same prediction error score. This in part at
+    // least relates to the increased size and hence coding overheads of
+    // motion vectors. Some account of this is made through adjustment of
+    // the error divisor.
+    ediv_size_correction =
+        VPXMAX(0.2, VPXMIN(5.0, get_linear_size_factor(cpi)));
+    if (ediv_size_correction < 1.0)
+      ediv_size_correction = -(1.0 / ediv_size_correction);
+    ediv_size_correction *= 4.0;
 
     // Try and pick a max Q that will be high enough to encode the
     // content at the given rate.