shithub: libvpx

Download patch

ref: ca29f6a7c47471621da52e96cb2d5c5acb81b41b
parent: 738a79191725f5af4a08451312f5a48fb386bc4b
parent: 6f5189c044cf52171822f6df1ad6ae4a3bcaf2c4
author: John Koleszar <jkoleszar@google.com>
date: Thu Mar 3 03:59:34 EST 2011

Merge remote branch 'internal/upstream' into HEAD

Conflicts:
	vp8/vp8_cx_iface.c

Change-Id: Iecfd4532ab1c722d10ecce8a5ec473e96093cf3b

--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -624,6 +624,10 @@
 
     # Handle Solaris variants. Solaris 10 needs -lposix4
     case ${toolchain} in
+        sparc-solaris-*)
+            add_extralibs -lposix4
+            add_cflags "-DMUST_BE_ALIGNED"
+            ;;
         *-solaris-*)
             add_extralibs -lposix4
             ;;
--- a/docs.mk
+++ b/docs.mk
@@ -34,7 +34,8 @@
 
 EXAMPLE_PATH += $(SRC_PATH_BARE) #for CHANGELOG, README, etc
 
-doxyfile: libs.doxy_template libs.doxy examples.doxy
+doxyfile: $(if $(findstring examples, $(ALL_TARGETS)),examples.doxy)
+doxyfile: libs.doxy_template libs.doxy
 	@echo "    [CREATE] $@"
 	@cat $^ > $@
 	@echo "STRIP_FROM_PATH += $(SRC_PATH_BARE) $(BUILD_ROOT)" >> $@
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -46,6 +46,9 @@
         cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
         cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_armv6;
+        cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_armv6;
+        cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_armv6;
+        cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_armv6;
 
         /*cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
         cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
--- a/vp8/encoder/arm/variance_arm.c
+++ b/vp8/encoder/arm/variance_arm.c
@@ -43,7 +43,41 @@
                                    dst_pixels_per_line, sse);
 }
 
-#endif
+unsigned int vp8_variance_halfpixvar16x16_h_armv6(
+    const unsigned char *src_ptr,
+    int  source_stride,
+    const unsigned char *ref_ptr,
+    int  recon_stride,
+    unsigned int *sse)
+{
+    return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 0,
+                                         ref_ptr, recon_stride, sse);
+}
+
+unsigned int vp8_variance_halfpixvar16x16_v_armv6(
+    const unsigned char *src_ptr,
+    int  source_stride,
+    const unsigned char *ref_ptr,
+    int  recon_stride,
+    unsigned int *sse)
+{
+    return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 0, 4,
+                                         ref_ptr, recon_stride, sse);
+}
+
+unsigned int vp8_variance_halfpixvar16x16_hv_armv6(
+    const unsigned char *src_ptr,
+    int  source_stride,
+    const unsigned char *ref_ptr,
+    int  recon_stride,
+    unsigned int *sse)
+{
+    return vp8_sub_pixel_variance16x16_armv6(src_ptr, source_stride, 4, 4,
+                                         ref_ptr, recon_stride, sse);
+}
+
+#endif /* HAVE_ARMV6 */
+
 
 #if HAVE_ARMV7
 
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -17,6 +17,9 @@
 extern prototype_sad(vp8_sad16x16_armv6);
 extern prototype_variance(vp8_variance16x16_armv6);
 extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6);
+extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6);
+extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6);
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 
@@ -29,9 +32,19 @@
 #undef  vp8_variance_var16x16
 #define vp8_variance_var16x16 vp8_variance16x16_armv6
 
+#undef  vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6
+
+#undef  vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_armv6
+
+#undef  vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_armv6
+
 #endif /* !CONFIG_RUNTIME_CPU_DETECT */
 
 #endif /* HAVE_ARMV6 */
+
 
 #if HAVE_ARMV7
 extern prototype_sad(vp8_sad4x4_neon);
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -95,8 +95,9 @@
     cpi->rtcd.search.full_search             = vp8_full_search_sad;
 #endif
     cpi->rtcd.search.diamond_search          = vp8_diamond_search_sad;
-
+#if !(CONFIG_REALTIME_ONLY)
     cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_c;
+#endif
 #endif
 
     // Pure C:
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -330,35 +330,31 @@
 
 void vp8_dealloc_compressor_data(VP8_COMP *cpi)
 {
-        vpx_free(cpi->tplist);
+    vpx_free(cpi->tplist);
     cpi->tplist = NULL;
 
     // Delete last frame MV storage buffers
-        vpx_free(cpi->lfmv);
-
+    vpx_free(cpi->lfmv);
     cpi->lfmv = 0;
 
-        vpx_free(cpi->lf_ref_frame_sign_bias);
-
+    vpx_free(cpi->lf_ref_frame_sign_bias);
     cpi->lf_ref_frame_sign_bias = 0;
 
-        vpx_free(cpi->lf_ref_frame);
-
+    vpx_free(cpi->lf_ref_frame);
     cpi->lf_ref_frame = 0;
 
     // Delete sementation map
-        vpx_free(cpi->segmentation_map);
-
+    vpx_free(cpi->segmentation_map);
     cpi->segmentation_map = 0;
 
-        vpx_free(cpi->active_map);
-
+    vpx_free(cpi->active_map);
     cpi->active_map = 0;
 
+#if !(CONFIG_REALTIME_ONLY)
     // Delete first pass motion map
-        vpx_free(cpi->fp_motion_map);
-
+    vpx_free(cpi->fp_motion_map);
     cpi->fp_motion_map = 0;
+#endif
 
     vp8_de_alloc_frame_buffers(&cpi->common);
 
@@ -380,21 +376,17 @@
     cpi->tok = 0;
 
     // Structure used to monitor GF usage
-        vpx_free(cpi->gf_active_flags);
-
+    vpx_free(cpi->gf_active_flags);
     cpi->gf_active_flags = 0;
 
-        vpx_free(cpi->mb.pip);
-
+    vpx_free(cpi->mb.pip);
     cpi->mb.pip = 0;
 
 #if !(CONFIG_REALTIME_ONLY)
-        vpx_free(cpi->total_stats);
-
+    vpx_free(cpi->total_stats);
     cpi->total_stats = 0;
 
-        vpx_free(cpi->this_frame_stats);
-
+    vpx_free(cpi->this_frame_stats);
     cpi->this_frame_stats = 0;
 #endif
 }
@@ -2227,9 +2219,11 @@
     vpx_memset(cpi->active_map , 1, (cpi->common.mb_rows * cpi->common.mb_cols));
     cpi->active_map_enabled = 0;
 
+#if !(CONFIG_REALTIME_ONLY)
     // Create the first pass motion map structure and set to 0
     // Allocate space for maximum of 15 buffers
     CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(15*cpi->common.MBs, 1));
+#endif
 
 #if 0
     // Experimental code for lagged and one pass
@@ -3195,8 +3189,11 @@
     // Update the Golden frame reconstruction buffer if signalled and the GF usage counts.
     if (cm->refresh_golden_frame)
     {
-        // Update the golden frame buffer
-        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+        if (cm->frame_type != KEY_FRAME)
+        {
+            // Update the golden frame buffer
+            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+        }
 
         // Select an interval before next GF
         if (!cpi->auto_gold)
@@ -4833,16 +4830,19 @@
 
     if (cpi->oxcf.error_resilient_mode)
     {
-        // Is this an alternate reference update
-        if (cpi->common.refresh_alt_ref_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
+        if (cm->frame_type != KEY_FRAME)
+        {
+            // Is this an alternate reference update
+            if (cm->refresh_alt_ref_frame)
+                vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
 
-        if (cpi->common.refresh_golden_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+            if (cm->refresh_golden_frame)
+                vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
+        }
     }
     else
     {
-        if (cpi->oxcf.play_alternate && cpi->common.refresh_alt_ref_frame && (cpi->common.frame_type != KEY_FRAME))
+        if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME))
             // Update the alternate reference frame and stats as appropriate.
             update_alt_ref_frame_and_stats(cpi);
         else
@@ -5317,23 +5317,6 @@
     if (cpi->compressor_speed == 2)
     {
         vp8_check_gf_quality(cpi);
-    }
-
-    if (!cpi)
-    {
-#if HAVE_ARMV7
-#if CONFIG_RUNTIME_CPU_DETECT
-        if (cm->rtcd.flags & HAS_NEON)
-#endif
-        {
-            vp8_pop_neon(store_reg);
-        }
-#endif
-        return 0;
-    }
-
-    if (cpi->compressor_speed == 2)
-    {
         vpx_usec_timer_start(&tsctimer);
         vpx_usec_timer_start(&ticktimer);
     }
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -51,7 +51,9 @@
 #define MV_ZBIN_BOOST        4
 #define ZBIN_OQ_MAX 192
 
+#if !(CONFIG_REALTIME_ONLY)
 #define VP8_TEMPORAL_ALT_REF 1
+#endif
 
 typedef struct
 {
@@ -496,9 +498,11 @@
     FIRSTPASS_STATS *stats_in, *stats_in_end;
     struct vpx_codec_pkt_list  *output_pkt_list;
     int                          first_pass_done;
-    unsigned char *fp_motion_map;
 
+#if !(CONFIG_REALTIME_ONLY)
+    unsigned char *fp_motion_map;
     unsigned char *fp_motion_map_stats, *fp_motion_map_stats_save;
+#endif
 
 #if 0
     // Experimental code for lagged and one pass
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -85,10 +85,9 @@
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
-    GET_GOT     rbx
+    push rbx
     push rsi
     push rdi
-    sub         rsp, 16
     ; end prolog
 
         mov         rsi,            arg(0) ;[src_ptr]
@@ -97,6 +96,29 @@
         movsxd      rax,            DWORD PTR arg(1) ;[source_stride]
         movsxd      rdx,            DWORD PTR arg(3) ;[recon_stride]
 
+        ; Prefetch data
+        lea             rcx,    [rax+rax*2]
+        prefetcht0      [rsi]
+        prefetcht0      [rsi+rax]
+        prefetcht0      [rsi+rax*2]
+        prefetcht0      [rsi+rcx]
+        lea             rbx,    [rsi+rax*4]
+        prefetcht0      [rbx]
+        prefetcht0      [rbx+rax]
+        prefetcht0      [rbx+rax*2]
+        prefetcht0      [rbx+rcx]
+
+        lea             rcx,    [rdx+rdx*2]
+        prefetcht0      [rdi]
+        prefetcht0      [rdi+rdx]
+        prefetcht0      [rdi+rdx*2]
+        prefetcht0      [rdi+rcx]
+        lea             rbx,    [rdi+rdx*4]
+        prefetcht0      [rbx]
+        prefetcht0      [rbx+rdx]
+        prefetcht0      [rbx+rdx*2]
+        prefetcht0      [rbx+rcx]
+
         pxor        xmm0,           xmm0                        ; clear xmm0 for unpack
         pxor        xmm7,           xmm7                        ; clear xmm7 for accumulating diffs
 
@@ -107,6 +129,9 @@
         movdqu      xmm1,           XMMWORD PTR [rsi]
         movdqu      xmm2,           XMMWORD PTR [rdi]
 
+        prefetcht0      [rsi+rax*8]
+        prefetcht0      [rdi+rdx*8]
+
         movdqa      xmm3,           xmm1
         movdqa      xmm4,           xmm2
 
@@ -178,10 +203,9 @@
 
 
     ; begin epilog
-    add rsp, 16
     pop rdi
     pop rsi
-    RESTORE_GOT
+    pop rbx
     UNSHADOW_ARGS
     pop         rbp
     ret
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -301,7 +301,9 @@
 #endif
         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse2;
 
+#if !(CONFIG_REALTIME_ONLY)
         cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_sse2;
+#endif
     }
 #endif
 
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -498,58 +498,68 @@
     {
         priv = calloc(1, sizeof(struct vpx_codec_alg_priv));
 
-        if (priv)
+        if (!priv)
         {
-            ctx->priv = &priv->base;
-            ctx->priv->sz = sizeof(*ctx->priv);
-            ctx->priv->iface = ctx->iface;
-            ctx->priv->alg_priv = priv;
-            ctx->priv->init_flags = ctx->init_flags;
+            return VPX_CODEC_MEM_ERROR;
+        }
 
-            if (ctx->config.enc)
-            {
-                /* Update the reference to the config structure to an
-                 * internal copy.
-                 */
-                ctx->priv->alg_priv->cfg = *ctx->config.enc;
-                ctx->config.enc = &ctx->priv->alg_priv->cfg;
-            }
+        ctx->priv = &priv->base;
+        ctx->priv->sz = sizeof(*ctx->priv);
+        ctx->priv->iface = ctx->iface;
+        ctx->priv->alg_priv = priv;
+        ctx->priv->init_flags = ctx->init_flags;
 
-            cfg =  &ctx->priv->alg_priv->cfg;
-
-            /* Select the extra vp6 configuration table based on the current
-             * usage value. If the current usage value isn't found, use the
-             * values for usage case 0.
+        if (ctx->config.enc)
+        {
+            /* Update the reference to the config structure to an
+             * internal copy.
              */
-            for (i = 0;
-                 extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage;
-                 i++);
+            ctx->priv->alg_priv->cfg = *ctx->config.enc;
+            ctx->config.enc = &ctx->priv->alg_priv->cfg;
+        }
 
-            priv->vp8_cfg = extracfg_map[i].cfg;
-            priv->vp8_cfg.pkt_list = &priv->pkt_list.head;
+        cfg =  &ctx->priv->alg_priv->cfg;
+
+        /* Select the extra vp6 configuration table based on the current
+         * usage value. If the current usage value isn't found, use the
+         * values for usage case 0.
+         */
+        for (i = 0;
+             extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage;
+             i++);
+
+        priv->vp8_cfg = extracfg_map[i].cfg;
+        priv->vp8_cfg.pkt_list = &priv->pkt_list.head;
             priv->vp8_cfg.experimental = experimental;
 
-            priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2;
+        priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2;
 
-            if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096;
+        if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096;
 
-            priv->cx_data = malloc(priv->cx_data_sz);
-            priv->deprecated_mode = NO_MODE_SET;
+        priv->cx_data = malloc(priv->cx_data_sz);
 
-            vp8_initialize();
+        if (!priv->cx_data)
+        {
+            return VPX_CODEC_MEM_ERROR;
+        }
 
-            res = validate_config(priv, &priv->cfg, &priv->vp8_cfg);
+        priv->deprecated_mode = NO_MODE_SET;
 
-            if (!res)
-            {
-                set_vp8e_config(&ctx->priv->alg_priv->oxcf, ctx->priv->alg_priv->cfg, ctx->priv->alg_priv->vp8_cfg);
-                optr = vp8_create_compressor(&ctx->priv->alg_priv->oxcf);
+        vp8_initialize();
 
-                if (!optr)
-                    res = VPX_CODEC_MEM_ERROR;
-                else
-                    ctx->priv->alg_priv->cpi = optr;
-            }
+        res = validate_config(priv, &priv->cfg, &priv->vp8_cfg);
+
+        if (!res)
+        {
+            set_vp8e_config(&ctx->priv->alg_priv->oxcf,
+                             ctx->priv->alg_priv->cfg,
+                             ctx->priv->alg_priv->vp8_cfg);
+            optr = vp8_create_compressor(&ctx->priv->alg_priv->oxcf);
+
+            if (!optr)
+                res = VPX_CODEC_MEM_ERROR;
+            else
+                ctx->priv->alg_priv->cpi = optr;
         }
     }
 
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -85,6 +85,7 @@
 
 ifeq ($(CONFIG_REALTIME_ONLY),yes)
 VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c
+VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c
 endif
 
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h
@@ -113,5 +114,10 @@
 VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
+
+ifeq ($(CONFIG_REALTIME_ONLY),yes)
+VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
+endif
+
 
 VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes))