shithub: libvpx

Download patch

ref: 74e8bd11c287ac3c0d10dc832de8c2da38dbb7a9
parent: 4d9dbb2ae81f2895101b94e6630ff394618b7de7
parent: 50e02b947a37ffd8b84ea492c3207fe79b01bbc2
author: John Koleszar <jkoleszar@google.com>
date: Wed Apr 3 17:07:53 EDT 2013

Merge "Adding decode_sb_16x16 function." into experimental

--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -486,6 +486,60 @@
   }
 }
 
+static INLINE void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
+  const int y_count = y_size * y_size;
+  const int uv_size = y_size / 2;
+  const int uv_count = uv_size * uv_size;
+
+  const int u_qcoeff_offset = (16 * 16) * y_count;
+  const int v_qcoeff_offset = u_qcoeff_offset + (16 * 16) * uv_count;
+  const int u_eob_offset = 16 * y_count;
+  const int v_eob_offset = u_eob_offset + 16 * uv_count;
+  int n;
+
+  for (n = 0; n < y_count; n++) {
+    const int x_idx = n % y_size;
+    const int y_idx = n / y_size;
+    const int y_offset = (y_idx * 16) * mb->dst.y_stride + (x_idx * 16);
+    const TX_TYPE tx_type = get_tx_type_16x16(mb, (y_idx * (4 * y_size) +
+                                                   x_idx) * 4);
+    if (tx_type == DCT_DCT) {
+      vp9_dequant_idct_add_16x16(mb->qcoeff + n * 16 * 16,
+                                 mb->block[0].dequant ,
+                                 mb->dst.y_buffer + y_offset,
+                                 mb->dst.y_buffer + y_offset,
+                                 mb->dst.y_stride, mb->dst.y_stride,
+                                 mb->eobs[n * 16]);
+    } else {
+      vp9_ht_dequant_idct_add_16x16_c(tx_type,
+                                      mb->qcoeff + n * 16 * 16,
+                                      mb->block[0].dequant,
+                                      mb->dst.y_buffer + y_offset,
+                                      mb->dst.y_buffer + y_offset,
+                                      mb->dst.y_stride, mb->dst.y_stride,
+                                      mb->eobs[n * 16]);
+    }
+  }
+
+  for (n = 0; n < uv_count; n++) {
+    const int x_idx = n % uv_size;
+    const int y_idx = n / uv_size;
+    const int uv_offset = (y_idx * 16) * mb->dst.uv_stride + (x_idx * 16);
+    vp9_dequant_idct_add_16x16(mb->qcoeff + u_qcoeff_offset + n * 16 * 16,
+                               mb->block[16].dequant,
+                               mb->dst.u_buffer + uv_offset,
+                               mb->dst.u_buffer + uv_offset,
+                               mb->dst.uv_stride, mb->dst.uv_stride,
+                               mb->eobs[u_eob_offset + n * 16]);
+    vp9_dequant_idct_add_16x16(mb->qcoeff + v_qcoeff_offset + n * 16 * 16,
+                               mb->block[20].dequant,
+                               mb->dst.v_buffer + uv_offset,
+                               mb->dst.v_buffer + uv_offset,
+                               mb->dst.uv_stride, mb->dst.uv_stride,
+                               mb->eobs[v_eob_offset + n * 16]);
+  }
+}
+
 static INLINE void decode_sb_8x8(MACROBLOCKD *mb, int y_size) {
   const int y_count = y_size * y_size;
   const int uv_size = y_size / 2;
@@ -503,9 +557,9 @@
     const int y_idx = n / y_size;
     const int y_offset = (y_idx * 8) * mb->dst.y_stride + (x_idx * 8);
     const TX_TYPE tx_type = get_tx_type_8x8(mb,
-                                            (y_idx * 2 * y_size + x_idx) * 2);
+                                            (y_idx * (2 * y_size) + x_idx) * 2);
     if (tx_type == DCT_DCT) {
-      vp9_dequant_idct_add_8x8_c(mb->qcoeff + n * 64,
+      vp9_dequant_idct_add_8x8_c(mb->qcoeff + n * 8 * 8,
                                  mb->block[0].dequant,
                                  mb->dst.y_buffer + y_offset,
                                  mb->dst.y_buffer + y_offset,
@@ -512,7 +566,7 @@
                                  mb->dst.y_stride, mb->dst.y_stride,
                                  mb->eobs[n * 4]);
     } else {
-      vp9_ht_dequant_idct_add_8x8_c(tx_type, mb->qcoeff + n * 64,
+      vp9_ht_dequant_idct_add_8x8_c(tx_type, mb->qcoeff + n * 8 * 8,
                                     mb->block[0].dequant,
                                     mb->dst.y_buffer + y_offset,
                                     mb->dst.y_buffer + y_offset,
@@ -526,14 +580,13 @@
     const int x_idx = n % uv_size;
     const int y_idx = n / uv_size;
     const int uv_offset = (y_idx * 8) * mb->dst.uv_stride + (x_idx * 8);
-
-    vp9_dequant_idct_add_8x8_c(mb->qcoeff + u_qcoeff_offset + n * 64,
+    vp9_dequant_idct_add_8x8_c(mb->qcoeff + u_qcoeff_offset + n * 8 * 8,
                                mb->block[16].dequant,
                                mb->dst.u_buffer + uv_offset,
                                mb->dst.u_buffer + uv_offset,
                                mb->dst.uv_stride, mb->dst.uv_stride,
                                mb->eobs[u_eob_offset + n * 4]);
-    vp9_dequant_idct_add_8x8_c(mb->qcoeff + v_qcoeff_offset + n * 64,
+    vp9_dequant_idct_add_8x8_c(mb->qcoeff + v_qcoeff_offset + n * 8 * 8,
                                mb->block[20].dequant,
                                mb->dst.v_buffer + uv_offset,
                                mb->dst.v_buffer + uv_offset,
@@ -548,8 +601,8 @@
   const int uv_size = y_size / 2;
   const int uv_count = uv_size * uv_size;
 
-  const int u_qcoeff_offset = y_count * 16;
-  const int v_qcoeff_offset = u_qcoeff_offset + uv_count * 16;
+  const int u_qcoeff_offset = y_count * 4 * 4;
+  const int v_qcoeff_offset = u_qcoeff_offset + uv_count * 4 * 4;
   const int u_eob_offset = y_count;
   const int v_eob_offset = u_eob_offset + uv_count;
 
@@ -559,9 +612,9 @@
     const int x_idx = n % y_size;
     const int y_idx = n / y_size;
     const int y_offset = (y_idx * 4) * mb->dst.y_stride + (x_idx * 4);
-    const TX_TYPE tx_type = get_tx_type_4x4(mb, y_idx * (y_size*2) + x_idx);
+    const TX_TYPE tx_type = get_tx_type_4x4(mb, y_idx * (2 * y_size) + x_idx);
     if (tx_type == DCT_DCT) {
-      mb->itxm_add(mb->qcoeff + n * 16,
+      mb->itxm_add(mb->qcoeff + n * 4 * 4,
                    mb->block[0].dequant,
                    mb->dst.y_buffer + y_offset,
                    mb->dst.y_buffer + y_offset,
@@ -568,7 +621,7 @@
                    mb->dst.y_stride, mb->dst.y_stride,
                    mb->eobs[n]);
     } else {
-      vp9_ht_dequant_idct_add_c(tx_type, mb->qcoeff + n * 16,
+      vp9_ht_dequant_idct_add_c(tx_type, mb->qcoeff + n * 4 * 4,
                                 mb->block[0].dequant,
                                 mb->dst.y_buffer + y_offset,
                                 mb->dst.y_buffer + y_offset,
@@ -582,16 +635,18 @@
     const int x_idx = n % uv_size;
     const int y_idx = n / uv_size;
     const int uv_offset = (y_idx * 4) * mb->dst.uv_stride + (x_idx * 4);
-    mb->itxm_add(mb->qcoeff + u_qcoeff_offset + n * 16,
+    mb->itxm_add(mb->qcoeff + u_qcoeff_offset + n * 4 * 4,
         mb->block[16].dequant,
         mb->dst.u_buffer + uv_offset,
         mb->dst.u_buffer + uv_offset,
-        mb->dst.uv_stride, mb->dst.uv_stride, mb->eobs[u_eob_offset + n]);
-    mb->itxm_add(mb->qcoeff + v_qcoeff_offset + n * 16,
+        mb->dst.uv_stride, mb->dst.uv_stride,
+        mb->eobs[u_eob_offset + n]);
+    mb->itxm_add(mb->qcoeff + v_qcoeff_offset + n * 4 * 4,
         mb->block[20].dequant,
         mb->dst.v_buffer + uv_offset,
         mb->dst.v_buffer + uv_offset,
-        mb->dst.uv_stride, mb->dst.uv_stride, mb->eobs[v_eob_offset + n]);
+        mb->dst.uv_stride, mb->dst.uv_stride,
+        mb->eobs[v_eob_offset + n]);
   }
 }
 
@@ -657,39 +712,7 @@
             xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]);
         break;
       case TX_16X16:
-        for (n = 0; n < 16; n++) {
-          const int x_idx = n & 3, y_idx = n >> 2;
-          const int y_offset = y_idx * 16 * xd->dst.y_stride + x_idx * 16;
-          const TX_TYPE tx_type = get_tx_type_16x16(xd,
-                                                    (y_idx * 16 + x_idx) * 4);
-          if (tx_type == DCT_DCT) {
-            vp9_dequant_idct_add_16x16(xd->qcoeff + n * 256,
-                xd->block[0].dequant,
-                xd->dst.y_buffer + y_offset,
-                xd->dst.y_buffer + y_offset,
-                xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
-          } else {
-            vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256,
-                xd->block[0].dequant,
-                xd->dst.y_buffer + y_offset,
-                xd->dst.y_buffer + y_offset,
-                xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
-          }
-        }
-        for (n = 0; n < 4; n++) {
-          const int x_idx = n & 1, y_idx = n >> 1;
-          const int uv_offset = y_idx * 16 * xd->dst.uv_stride + x_idx * 16;
-          vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + n * 256,
-              xd->block[16].dequant,
-              xd->dst.u_buffer + uv_offset,
-              xd->dst.u_buffer + uv_offset,
-              xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256 + n * 16]);
-          vp9_dequant_idct_add_16x16(xd->qcoeff + 4096 + 1024 + n * 256,
-              xd->block[20].dequant,
-              xd->dst.v_buffer + uv_offset,
-              xd->dst.v_buffer + uv_offset,
-              xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320 + n * 16]);
-        }
+        decode_sb_16x16(xd, 4);
         break;
       case TX_8X8:
         decode_sb_8x8(xd, 8);
@@ -707,7 +730,7 @@
 
 static void decode_sb32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
                         BOOL_DECODER* const bc) {
-  int n, eobtotal;
+  int eobtotal;
   VP9_COMMON *const pc = &pbi->common;
   MODE_INFO *mi = xd->mode_info_context;
   const int mis = pc->mode_info_stride;
@@ -764,30 +787,7 @@
                                               xd->dst.uv_stride, xd);
         break;
       case TX_16X16:
-        for (n = 0; n < 4; n++) {
-          const int x_idx = n & 1, y_idx = n >> 1;
-          const int y_offset = y_idx * 16 * xd->dst.y_stride + x_idx * 16;
-          const TX_TYPE tx_type = get_tx_type_16x16(xd,
-                                                    (y_idx * 8 + x_idx) * 4);
-          if (tx_type == DCT_DCT) {
-            vp9_dequant_idct_add_16x16(
-                xd->qcoeff + n * 256, xd->block[0].dequant,
-                xd->dst.y_buffer + y_offset,
-                xd->dst.y_buffer + y_offset,
-                xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
-          } else {
-            vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff + n * 256,
-                xd->block[0].dequant,
-                xd->dst.y_buffer + y_offset,
-                xd->dst.y_buffer + y_offset,
-                xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 16]);
-          }
-        }
-        vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024,
-                                              xd->block[16].dequant,
-                                              xd->dst.u_buffer,
-                                              xd->dst.v_buffer,
-                                              xd->dst.uv_stride, xd);
+        decode_sb_16x16(xd, 2);
         break;
       case TX_8X8:
         decode_sb_8x8(xd, 4);