ref: b210797a6ada5816a56cacb0b4106a5bf525314c
parent: 23c4cbdfbae53661877d449072241651b788d016
parent: 8c48c943e7c082b4330061e74ce85547253de301
author: John Koleszar <jkoleszar@google.com>
date: Mon Mar 14 20:05:07 EDT 2011
Merge remote branch 'internal/upstream' into HEAD
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -918,15 +918,23 @@
char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
strncpy(name, ptr, 8);
//log_msg("COFF: Parsing symbol %s\n",name);
- /* +1 to avoid printing leading underscore */
- printf("%-40s EQU ", name + 1);
+ /* The 64bit Windows compiler doesn't prefix with an _.
+ * Check what's there, and bump if necessary
+ */
+ if (name[0] == '_')
+ printf("%-40s EQU ", name + 1);
+ else
+ printf("%-40s EQU ", name);
}
else
{
//log_msg("COFF: Parsing symbol %s\n",
// buf + strtab_ptr + get_le32(ptr+4));
- /* +1 to avoid printing leading underscore */
- printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4) + 1);
+ if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
+ printf("%-40s EQU ",
+ buf + strtab_ptr + get_le32(ptr + 4) + 1);
+ else
+ printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
}
if (!(strcmp(sectionlist[section-1], ".bss")))
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -296,18 +296,6 @@
}
}
- if(pbi->common.filter_level)
- {
- /*update loopfilter info*/
- Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
- filter_level = pbi->mt_baseline_filter_level[Segment];
- /* Distance of Mb to the various image edges.
- * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
- * Apply any context driven MB level adjustment
- */
- filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
- }
-
/* Distance of Mb to the various image edges.
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
*/
@@ -362,7 +350,16 @@
}
}
- /* loopfilter on this macroblock. */
+ /* update loopfilter info */
+ Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+ filter_level = pbi->mt_baseline_filter_level[Segment];
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ * Apply any context driven MB level adjustment
+ */
+ filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
+
+ /* loopfilter on this macroblock. */
if (filter_level)
{
if (mb_col > 0)
@@ -778,18 +775,6 @@
}
}
- if(pbi->common.filter_level)
- {
- /* update loopfilter info */
- Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
- filter_level = pbi->mt_baseline_filter_level[Segment];
- /* Distance of Mb to the various image edges.
- * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
- * Apply any context driven MB level adjustment
- */
- filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
- }
-
/* Distance of Mb to the various image edges.
* These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
*/
@@ -852,6 +837,15 @@
}
}
}
+
+ /* update loopfilter info */
+ Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+ filter_level = pbi->mt_baseline_filter_level[Segment];
+ /* Distance of Mb to the various image edges.
+ * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+ * Apply any context driven MB level adjustment
+ */
+ filter_level = vp8_adjust_mb_lf_value(xd, filter_level);
/* loopfilter on this macroblock. */
if (filter_level)
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -50,8 +50,8 @@
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_armv6;
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6;
- /*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
- cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
+ cpi->rtcd.variance.mse16x16 = vp8_mse16x16_armv6;
+ /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
/*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c;
cpi->rtcd.variance.get8x8var = vp8_get8x8var_c;
--- /dev/null
+++ b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
@@ -1,0 +1,133 @@
+;
+; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vp8_mse16x16_armv6|
+
+ ARM
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0 unsigned char *src_ptr
+; r1 int source_stride
+; r2 unsigned char *ref_ptr
+; r3 int recon_stride
+; stack unsigned int *sse
+;
+;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
+; So, we can remove this part of calculation.
+
+|vp8_mse16x16_armv6| PROC
+
+ push {r4-r9, lr}
+ mov r12, #16 ; set loop counter to 16 (=block height)
+
+ mov r4, #0 ; initialize sse = 0
+
+loop
+ ; 1st 4 pixels
+ ldr r5, [r0, #0x0] ; load 4 src pixels
+ ldr r6, [r2, #0x0] ; load 4 ref pixels
+
+ mov lr, #0 ; constant zero
+
+ usub8 r8, r5, r6 ; calculate difference
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+
+ ldr r5, [r0, #0x4] ; load 4 src pixels
+
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+
+ ; 2nd 4 pixels
+ ldr r6, [r2, #0x4] ; load 4 ref pixels
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r8, r5, r6 ; calculate difference
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+ ldr r5, [r0, #0x8] ; load 4 src pixels
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+
+ ; 3rd 4 pixels
+ ldr r6, [r2, #0x8] ; load 4 ref pixels
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r8, r5, r6 ; calculate difference
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+
+ ldr r5, [r0, #0xc] ; load 4 src pixels
+
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+
+ ; 4th 4 pixels
+ ldr r6, [r2, #0xc] ; load 4 ref pixels
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r8, r5, r6 ; calculate difference
+ add r0, r0, r1 ; set src_ptr to next row
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ add r2, r2, r3 ; set dst_ptr to next row
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+
+ subs r12, r12, #1 ; next row
+
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ bne loop
+
+ ; return stuff
+ ldr r1, [sp, #28] ; get address of sse
+ mov r0, r4 ; return sse
+ str r4, [r1] ; store sse
+
+ pop {r4-r9, pc}
+
+ ENDP
+
+ END
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -20,6 +20,7 @@
extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6);
extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6);
extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);
+extern prototype_variance(vp8_mse16x16_armv6);
#if !CONFIG_RUNTIME_CPU_DETECT
@@ -31,6 +32,9 @@
#undef vp8_variance_var16x16
#define vp8_variance_var16x16 vp8_variance16x16_armv6
+
+#undef vp8_variance_mse16x16
+#define vp8_variance_mse16x16 vp8_mse16x16_armv6
#undef vp8_variance_halfpixvar16x16_h
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -37,6 +37,7 @@
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/walsh_v6$(ASM)
#File list for neon
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -332,7 +332,7 @@
* extended in one of two ways. First, a second, algorithm specific structure
* can be allocated and the priv member pointed to it. Alternatively, this
* structure can be made the first member of the algorithm specific structure,
- * and the pointer casted to the proper type.
+ * and the pointer cast to the proper type.
*/
struct vpx_codec_priv
{
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -168,15 +168,10 @@
%macro GET_GOT 1
push %1
call %%get_got
- %%sub_offset:
- jmp %%exitGG
%%get_got:
- mov %1, [esp]
- add %1, fake_got - %%sub_offset
- ret
- %%exitGG:
+ pop %1
%undef GLOBAL
- %define GLOBAL(x) x + %1 - fake_got
+ %define GLOBAL(x) x + %1 - %%get_got
%undef RESTORE_GOT
%define RESTORE_GOT pop %1
%endmacro
@@ -289,7 +284,6 @@
%elifidn __OUTPUT_FORMAT__,macho32
%macro SECTION_RODATA 0
section .text
-fake_got:
%endmacro
%else
%define SECTION_RODATA section .rodata