shithub: libvpx

Download patch

ref: 62400028e22b3544eac09724afe45f3cd55766a1
parent: 96513c42b2e34ea096edd9eb649120362c71836f
parent: ca7e346669ce7d9a410edc19a828f9ca9637626d
author: John Koleszar <jkoleszar@google.com>
date: Tue Jul 26 06:22:42 EDT 2011

Merge remote branch 'internal/upstream' into HEAD

Conflicts:
	vp8/decoder/detokenize.c
	vp8/decoder/onyxd_int.h

Change-Id: Ib9b516b939358ac8bf694200a8425fdd62c8d149

--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -629,7 +629,7 @@
     case ${toolchain} in
         sparc-solaris-*)
             add_extralibs -lposix4
-            add_cflags "-DMUST_BE_ALIGNED"
+            disable fast_unaligned
             ;;
         *-solaris-*)
             add_extralibs -lposix4
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -10,6 +10,7 @@
 
 
 #include "vpx_ports/config.h"
+#include "vpx/vpx_integer.h"
 #include "recon.h"
 #include "subpixel.h"
 #include "blockd.h"
@@ -18,12 +19,6 @@
 #include "onyxc_int.h"
 #endif
 
-/* use this define on systems where unaligned int reads and writes are
- * not allowed, i.e. ARM architectures
- */
-/*#define MUST_BE_ALIGNED*/
-
-
 static const int bbb[4] = {0, 2, 8, 10};
 
 
@@ -39,7 +34,7 @@
 
     for (r = 0; r < 16; r++)
     {
-#ifdef MUST_BE_ALIGNED
+#if !(CONFIG_FAST_UNALIGNED)
         dst[0] = src[0];
         dst[1] = src[1];
         dst[2] = src[2];
@@ -58,10 +53,10 @@
         dst[15] = src[15];
 
 #else
-        ((int *)dst)[0] = ((int *)src)[0] ;
-        ((int *)dst)[1] = ((int *)src)[1] ;
-        ((int *)dst)[2] = ((int *)src)[2] ;
-        ((int *)dst)[3] = ((int *)src)[3] ;
+        ((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
+        ((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
+        ((uint32_t *)dst)[2] = ((uint32_t *)src)[2] ;
+        ((uint32_t *)dst)[3] = ((uint32_t *)src)[3] ;
 
 #endif
         src += src_stride;
@@ -81,7 +76,7 @@
 
     for (r = 0; r < 8; r++)
     {
-#ifdef MUST_BE_ALIGNED
+#if !(CONFIG_FAST_UNALIGNED)
         dst[0] = src[0];
         dst[1] = src[1];
         dst[2] = src[2];
@@ -91,8 +86,8 @@
         dst[6] = src[6];
         dst[7] = src[7];
 #else
-        ((int *)dst)[0] = ((int *)src)[0] ;
-        ((int *)dst)[1] = ((int *)src)[1] ;
+        ((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
+        ((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
 #endif
         src += src_stride;
         dst += dst_stride;
@@ -111,7 +106,7 @@
 
     for (r = 0; r < 4; r++)
     {
-#ifdef MUST_BE_ALIGNED
+#if !(CONFIG_FAST_UNALIGNED)
         dst[0] = src[0];
         dst[1] = src[1];
         dst[2] = src[2];
@@ -121,8 +116,8 @@
         dst[6] = src[6];
         dst[7] = src[7];
 #else
-        ((int *)dst)[0] = ((int *)src)[0] ;
-        ((int *)dst)[1] = ((int *)src)[1] ;
+        ((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
+        ((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
 #endif
         src += src_stride;
         dst += dst_stride;
@@ -154,13 +149,13 @@
 
         for (r = 0; r < 4; r++)
         {
-#ifdef MUST_BE_ALIGNED
+#if !(CONFIG_FAST_UNALIGNED)
             pred_ptr[0]  = ptr[0];
             pred_ptr[1]  = ptr[1];
             pred_ptr[2]  = ptr[2];
             pred_ptr[3]  = ptr[3];
 #else
-            *(int *)pred_ptr = *(int *)ptr ;
+            *(uint32_t *)pred_ptr = *(uint32_t *)ptr ;
 #endif
             pred_ptr     += pitch;
             ptr         += d->pre_stride;
--- a/vp8/decoder/asm_dec_offsets.c
+++ b/vp8/decoder/asm_dec_offsets.c
@@ -17,7 +17,6 @@
 DEFINE(detok_scan,                              offsetof(DETOK, scan));
 DEFINE(detok_ptr_block2leftabove,               offsetof(DETOK, ptr_block2leftabove));
 DEFINE(detok_coef_tree_ptr,                     offsetof(DETOK, vp8_coef_tree_ptr));
-DEFINE(detok_teb_base_ptr,                      offsetof(DETOK, teb_base_ptr));
 DEFINE(detok_norm_ptr,                          offsetof(DETOK, norm_ptr));
 DEFINE(detok_ptr_coef_bands_x,                  offsetof(DETOK, ptr_coef_bands_x));
 
@@ -34,9 +33,6 @@
 DEFINE(bool_decoder_value,                      offsetof(BOOL_DECODER, value));
 DEFINE(bool_decoder_count,                      offsetof(BOOL_DECODER, count));
 DEFINE(bool_decoder_range,                      offsetof(BOOL_DECODER, range));
-
-DEFINE(tokenextrabits_min_val,                  offsetof(TOKENEXTRABITS, min_val));
-DEFINE(tokenextrabits_length,                   offsetof(TOKENEXTRABITS, Length));
 
 END
 
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -50,47 +50,37 @@
 #define CAT_THREE_CONTEXT_NODE      9
 #define CAT_FIVE_CONTEXT_NODE       10
 
-/*
-//the definition is put in "onyxd_int.h"
-typedef struct
-{
-    INT16         min_val;
-    INT16         Length;
-    UINT8 Probs[14];
-} TOKENEXTRABITS;
-*/
+#define CAT1_MIN_VAL    5
+#define CAT2_MIN_VAL    7
+#define CAT3_MIN_VAL   11
+#define CAT4_MIN_VAL   19
+#define CAT5_MIN_VAL   35
+#define CAT6_MIN_VAL   67
+#define CAT1_PROB0    159
+#define CAT2_PROB0    145
+#define CAT2_PROB1    165
+
+#define CAT3_PROB0 140
+#define CAT3_PROB1 148
+#define CAT3_PROB2 173
+
+#define CAT4_PROB0 135
+#define CAT4_PROB1 140
+#define CAT4_PROB2 155
+#define CAT4_PROB3 176
+
+#define CAT5_PROB0 130
+#define CAT5_PROB1 134
+#define CAT5_PROB2 141
+#define CAT5_PROB3 157
+#define CAT5_PROB4 180
+
 #if CONFIG_EXTEND_QRANGE
-DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
-{
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /* ZERO_TOKEN */
-    {  1, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* ONE_TOKEN */
-    {  2, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* TWO_TOKEN */
-    {  3, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* THREE_TOKEN */
-    {  4, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* FOUR_TOKEN */
-    {  5, 0, { 159, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /* DCT_VAL_CATEGORY1 */
-    {  7, 1, { 145, 165, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY2 */
-    { 11, 2, { 140, 148, 173, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY3 */
-    { 19, 3, { 135, 140, 155, 176, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY4 */
-    { 35, 4, { 130, 134, 141, 157, 180, 0,  0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY5 */
-    { 67, 12, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 249, 252, 254, 254,  0   } }, /* DCT_VAL_CATEGORY6 */
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /*  EOB TOKEN */
-};
+static const unsigned char cat6_prob[14] =
+{ 129, 130, 133, 140, 153, 177, 196, 230, 243, 249, 252, 254, 254, 0 };
 #else
-DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
-{
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /* ZERO_TOKEN */
-    {  1, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* ONE_TOKEN */
-    {  2, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* TWO_TOKEN */
-    {  3, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* THREE_TOKEN */
-    {  4, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* FOUR_TOKEN */
-    {  5, 0, { 159, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /* DCT_VAL_CATEGORY1 */
-    {  7, 1, { 145, 165, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY2 */
-    { 11, 2, { 140, 148, 173, 0,  0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY3 */
-    { 19, 3, { 135, 140, 155, 176, 0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY4 */
-    { 35, 4, { 130, 134, 141, 157, 180, 0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY5 */
-    { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0   } }, /* DCT_VAL_CATEGORY6 */
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /*  EOB TOKEN */
-};
+static const unsigned char cat6_prob[12] =
+{ 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 };
 #endif
 
 
@@ -97,7 +87,8 @@
 void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
 {
     /* Clear entropy contexts for Y2 blocks */
-    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != B_PRED &&
+        x->mode_info_context->mbmi.mode != SPLITMV)
     {
         vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
         vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
@@ -227,7 +218,7 @@
         qcoeff_ptr [ scan[c] ] = (INT16) v; \
         ++c; \
         goto DO_WHILE; }\
-    qcoeff_ptr [ scan[15] ] = (INT16) v; \
+    qcoeff_ptr [ 15 ] = (INT16) v; \
     goto BLOCK_FINISHED;
 
 #if CONFIG_T8X8
@@ -251,8 +242,8 @@
     goto BLOCK_FINISHED_8x8;
 #endif
 
-#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
-    split = 1 +  (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
+#define DECODE_EXTRABIT_AND_ADJUST_VAL(prob, bits_count)\
+    split = 1 +  (((range-1) * prob) >> 8); \
     bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
     FILL \
     if(value >= bigsplit)\
@@ -658,7 +649,8 @@
     scan = vp8_default_zig_zag1d;
     qcoeff_ptr = &x->qcoeff[0];
 
-    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != B_PRED &&
+        x->mode_info_context->mbmi.mode != SPLITMV)
     {
         i = 24;
         stop = 24;
@@ -694,16 +686,21 @@
 CHECK_0_:
     DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
     DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_);
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val;
-    bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length;
+    DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE],
+                              LOW_VAL_CONTEXT_NODE_0_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE],
+                              HIGH_LOW_CONTEXT_NODE_0_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE],
+                              CAT_THREEFOUR_CONTEXT_NODE_0_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE],
+                              CAT_FIVE_CONTEXT_NODE_0_);
 
+    val = CAT6_MIN_VAL;
+    bits_count = CONFIG_EXTEND_QRANGE?14:12;
+
     do
     {
-        DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count);
+        DECODE_EXTRABIT_AND_ADJUST_VAL(cat6_prob[bits_count], bits_count);
         bits_count -- ;
     }
     while (bits_count >= 0);
@@ -711,41 +708,43 @@
     DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
 
 CAT_FIVE_CONTEXT_NODE_0_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0);
+    val = CAT5_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB4, 4);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB3, 3);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB2, 2);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT5_PROB0, 0);
     DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
 
 CAT_THREEFOUR_CONTEXT_NODE_0_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_);
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE],
+                              CAT_THREE_CONTEXT_NODE_0_);
+    val = CAT4_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB3, 3);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB2, 2);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT4_PROB0, 0);
     DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
 
 CAT_THREE_CONTEXT_NODE_0_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0);
+    val = CAT3_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB2, 2);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT3_PROB0, 0);
     DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
 
 HIGH_LOW_CONTEXT_NODE_0_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_);
+    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE],
+                              CAT_ONE_CONTEXT_NODE_0_);
 
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0);
+    val = CAT2_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB1, 1);
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT2_PROB0, 0);
     DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
 
 CAT_ONE_CONTEXT_NODE_0_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0);
+    val = CAT1_MIN_VAL;
+    DECODE_EXTRABIT_AND_ADJUST_VAL(CAT1_PROB0, 0);
     DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
 
 LOW_VAL_CONTEXT_NODE_0_:
@@ -770,7 +769,7 @@
         goto DO_WHILE;
     }
 
-    qcoeff_ptr [ scan[15] ] = (INT16) v;
+    qcoeff_ptr [ 15 ] = (INT16) v;
 BLOCK_FINISHED:
     *a = *l = ((eobs[i] = c) != !type);   /* any nonzero data? */
     eobtotal += c;
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -44,23 +44,8 @@
     int size;
 } DATARATE;
 
-#if CONFIG_EXTEND_QRANGE
 typedef struct
 {
-    INT16        min_val;
-    INT16        Length;
-    UINT8 Probs[14];
-} TOKENEXTRABITS;
-#else
-typedef struct
-{
-    INT16        min_val;
-    INT16        Length;
-    UINT8 Probs[12];
-} TOKENEXTRABITS;
-#endif
-typedef struct
-{
     int const *scan;
 #if CONFIG_T8X8
     int const *scan_8x8;
@@ -67,7 +52,6 @@
 #endif
     UINT8 const *ptr_block2leftabove;
     vp8_tree_index const *vp8_coef_tree_ptr;
-    TOKENEXTRABITS const *teb_base_ptr;
     unsigned char *norm_ptr;
     UINT8 *ptr_coef_bands_x;
 #if CONFIG_T8X8
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -43,7 +43,7 @@
     %define     ret_var     r11
     %define     result_ptr  [rsp+xmm_stack_space+8+4*8]
     %define     max_err     [rsp+xmm_stack_space+8+4*8]
-    %define     height      [rsp+xmm_stack_space+8+4*8]
+    %define     height      dword ptr [rsp+xmm_stack_space+8+4*8]
   %else
     %define     src_ptr     rdi
     %define     src_stride  rsi