shithub: libvpx

Download patch

ref: c4d1ab573ae8120e6020c9e986b4cdc6b9f5b802
parent: 40047bef5d116eda350a72097554c9630dad66e9
author: Dmitry Kovalev <dkovalev@google.com>
date: Wed Oct 2 07:48:08 EDT 2013

Removing memset calls inside idct/iht functions.

Making appropriate memset inside decode_block now.

Change-Id: I8e944194668c830de08271c8fb6e413251c201d8

--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -92,32 +92,44 @@
   int16_t* const qcoeff = BLOCK_OFFSET(pd->qcoeff, block);
   const int stride = pd->dst.stride;
   const int eob = pd->eobs[block];
-  const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size,
-                                                       block);
-  uint8_t* const dst = raster_block_offset_uint8(plane_bsize, raster_block,
-                                                 pd->dst.buf, stride);
-  switch (tx_size) {
-    case TX_4X4: {
-      const TX_TYPE tx_type = get_tx_type_4x4(pd->plane_type, xd, raster_block);
-      if (tx_type == DCT_DCT)
-        xd->itxm_add(qcoeff, dst, stride, eob);
+  if (eob > 0) {
+    TX_TYPE tx_type;
+    const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size,
+                                                         block);
+    uint8_t* const dst = raster_block_offset_uint8(plane_bsize, raster_block,
+                                                   pd->dst.buf, stride);
+    switch (tx_size) {
+      case TX_4X4:
+        tx_type = get_tx_type_4x4(pd->plane_type, xd, raster_block);
+        if (tx_type == DCT_DCT)
+          xd->itxm_add(qcoeff, dst, stride, eob);
+        else
+          vp9_iht_add_c(tx_type, qcoeff, dst, stride, eob);
+        break;
+      case TX_8X8:
+        tx_type = get_tx_type_8x8(pd->plane_type, xd);
+        vp9_iht_add_8x8_c(tx_type, qcoeff, dst, stride, eob);
+        break;
+      case TX_16X16:
+        tx_type = get_tx_type_16x16(pd->plane_type, xd);
+        vp9_iht_add_16x16_c(tx_type, qcoeff, dst, stride, eob);
+        break;
+      case TX_32X32:
+        tx_type = DCT_DCT;
+        vp9_idct_add_32x32(qcoeff, dst, stride, eob);
+        break;
+      default:
+        assert(!"Invalid transform size");
+    }
+
+    if (eob == 1) {
+      *((int32_t *)qcoeff) = 0;
+    } else {
+      if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
+        vpx_memset(qcoeff, 0, 4 * (4 << tx_size) * sizeof(qcoeff[0]));
       else
-        vp9_iht_add_c(tx_type, qcoeff, dst, stride, eob);
-      break;
+        vpx_memset(qcoeff, 0, (16 << (tx_size << 1)) * sizeof(qcoeff[0]));
     }
-    case TX_8X8:
-      vp9_iht_add_8x8_c(get_tx_type_8x8(pd->plane_type, xd), qcoeff, dst,
-                        stride, eob);
-      break;
-    case TX_16X16:
-      vp9_iht_add_16x16_c(get_tx_type_16x16(pd->plane_type, xd), qcoeff, dst,
-                          stride, eob);
-      break;
-    case TX_32X32:
-      vp9_idct_add_32x32(qcoeff, dst, stride, eob);
-      break;
-    default:
-      assert(!"Invalid transform size");
   }
 }
 
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -14,12 +14,10 @@
 
 void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
                    int eob) {
-  if (tx_type == DCT_DCT) {
+  if (tx_type == DCT_DCT)
     vp9_idct_add(input, dest, stride, eob);
-  } else {
+  else
     vp9_short_iht4x4_add(input, dest, stride, tx_type);
-    vpx_memset(input, 0, 32);
-  }
 }
 
 void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
@@ -29,30 +27,23 @@
   } else {
     if (eob > 0) {
       vp9_short_iht8x8_add(input, dest, stride, tx_type);
-      vpx_memset(input, 0, 128);
     }
   }
 }
 
 void vp9_idct_add_c(int16_t *input, uint8_t *dest, int stride, int eob) {
-  if (eob > 1) {
+  if (eob > 1)
     vp9_short_idct4x4_add(input, dest, stride);
-    vpx_memset(input, 0, 32);
-  } else {
+  else
     vp9_short_idct4x4_1_add(input, dest, stride);
-    ((int *)input)[0] = 0;
-  }
 }
 
 void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest, int stride,
                              int eob) {
-  if (eob > 1) {
+  if (eob > 1)
     vp9_short_iwalsh4x4_add(input, dest, stride);
-    vpx_memset(input, 0, 32);
-  } else {
+  else
     vp9_short_iwalsh4x4_1_add_c(input, dest, stride);
-    ((int *)input)[0] = 0;
-  }
 }
 
 void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) {
@@ -64,17 +55,13 @@
   // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
   // Combine that with code here.
   if (eob) {
-    if (eob == 1) {
+    if (eob == 1)
       // DC only DCT coefficient
       vp9_short_idct8x8_1_add(input, dest, stride);
-      input[0] = 0;
-    } else if (eob <= 10) {
+    else if (eob <= 10)
       vp9_short_idct8x8_10_add(input, dest, stride);
-      vpx_memset(input, 0, 128);
-    } else {
+    else
       vp9_short_idct8x8_add(input, dest, stride);
-      vpx_memset(input, 0, 128);
-    }
   }
 }
 
@@ -85,7 +72,6 @@
   } else {
     if (eob > 0) {
       vp9_short_iht16x16_add(input, dest, stride, tx_type);
-      vpx_memset(input, 0, 512);
     }
   }
 }
@@ -94,29 +80,22 @@
   /* The calculation can be simplified if there are not many non-zero dct
    * coefficients. Use eobs to separate different cases. */
   if (eob) {
-    if (eob == 1) {
+    if (eob == 1)
       /* DC only DCT coefficient. */
       vp9_short_idct16x16_1_add(input, dest, stride);
-      input[0] = 0;
-    } else if (eob <= 10) {
+    else if (eob <= 10)
       vp9_short_idct16x16_10_add(input, dest, stride);
-      vpx_memset(input, 0, 512);
-    } else {
+    else
       vp9_short_idct16x16_add(input, dest, stride);
-      vpx_memset(input, 0, 512);
-    }
   }
 }
 
 void vp9_idct_add_32x32_c(int16_t *input, uint8_t *dest, int stride, int eob) {
   if (eob) {
-    if (eob == 1) {
+    if (eob == 1)
       vp9_short_idct32x32_1_add(input, dest, stride);
-      input[0] = 0;
-    } else {
+    else
       vp9_short_idct32x32_add(input, dest, stride);
-      vpx_memset(input, 0, 2048);
-    }
   }
 }