shithub: libvpx

--- a/build/make/obj_int_extract.c

+++ b/build/make/obj_int_extract.c

@@ -918,15 +918,23 @@

                 char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};

                 strncpy(name, ptr, 8);

                 //log_msg("COFF: Parsing symbol %s\n",name);

-                /* +1 to avoid printing leading underscore */

-                printf("%-40s EQU ", name + 1);

+                /* The 64bit Windows compiler doesn't prefix with an _.

+                 * Check what's there, and bump if necessary

+                 */

+                if (name[0] == '_')

+                    printf("%-40s EQU ", name + 1);

+                else

+                    printf("%-40s EQU ", name);

             else

                 //log_msg("COFF: Parsing symbol %s\n",

                 //        buf + strtab_ptr + get_le32(ptr+4));

-                /* +1 to avoid printing leading underscore */

-                printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4) + 1);

+                if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')

+                    printf("%-40s EQU ",

+                           buf + strtab_ptr + get_le32(ptr + 4) + 1);

+                else

+                    printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));

             if (!(strcmp(sectionlist[section-1], ".bss")))

--- a/vp8/decoder/threading.c

+++ b/vp8/decoder/threading.c

@@ -296,18 +296,6 @@

-                        if(pbi->common.filter_level)

-                        {

-                            /*update loopfilter info*/

-                            Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;

-                            filter_level = pbi->mt_baseline_filter_level[Segment];

-                            /* Distance of Mb to the various image edges.

-                             * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units

-                             * Apply any context driven MB level adjustment

-                             */

-                            filter_level = vp8_adjust_mb_lf_value(xd, filter_level);

-                        }

                         /* Distance of Mb to the various image edges.

                          * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units

*/

@@ -362,7 +350,16 @@

-                          /* loopfilter on this macroblock. */

+                            /* update loopfilter info */

+                            Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;

+                            filter_level = pbi->mt_baseline_filter_level[Segment];

+                            /* Distance of Mb to the various image edges.

+                             * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units

+                             * Apply any context driven MB level adjustment

+                             */

+                            filter_level = vp8_adjust_mb_lf_value(xd, filter_level);

+                            /* loopfilter on this macroblock. */

                             if (filter_level)

                                 if (mb_col > 0)

@@ -778,18 +775,6 @@

-                if(pbi->common.filter_level)

-                {

-                    /* update loopfilter info */

-                    Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;

-                    filter_level = pbi->mt_baseline_filter_level[Segment];

-                    /* Distance of Mb to the various image edges.

-                     * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units

-                     * Apply any context driven MB level adjustment

-                     */

-                    filter_level = vp8_adjust_mb_lf_value(xd, filter_level);

-                }

                 /* Distance of Mb to the various image edges.

                  * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units

*/

@@ -852,6 +837,15 @@

+                    /* update loopfilter info */

+                    Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;

+                    filter_level = pbi->mt_baseline_filter_level[Segment];

+                    /* Distance of Mb to the various image edges.

+                     * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units

+                     * Apply any context driven MB level adjustment

+                     */

+                    filter_level = vp8_adjust_mb_lf_value(xd, filter_level);

                     /* loopfilter on this macroblock. */

                     if (filter_level)

--- a/vp8/encoder/arm/arm_csystemdependent.c

+++ b/vp8/encoder/arm/arm_csystemdependent.c

@@ -50,8 +50,8 @@

         cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_armv6;

         cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_armv6;

-        /*cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;

-        cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/

+        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_armv6;

+        /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/

         /*cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;

         cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;

--- /dev/null

+++ b/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm

@@ -1,0 +1,133 @@

+;

+;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.

+;

+;  Use of this source code is governed by a BSD-style license

+;  that can be found in the LICENSE file in the root of the source

+;  tree. An additional intellectual property rights grant can be found

+;  in the file PATENTS.  All contributing project authors may

+;  be found in the AUTHORS file in the root of the source tree.

+;

+    EXPORT  |vp8_mse16x16_armv6|

+    ARM

+    AREA ||.text||, CODE, READONLY, ALIGN=2

+; r0    unsigned char *src_ptr

+; r1    int source_stride

+; r2    unsigned char *ref_ptr

+; r3    int  recon_stride

+; stack unsigned int *sse

+;

+;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.

+;      So, we can remove this part of calculation.

+|vp8_mse16x16_armv6| PROC

+    push    {r4-r9, lr}

+    mov     r12, #16            ; set loop counter to 16 (=block height)

+    mov     r4, #0              ; initialize sse = 0

+loop

+    ; 1st 4 pixels

+    ldr     r5, [r0, #0x0]      ; load 4 src pixels

+    ldr     r6, [r2, #0x0]      ; load 4 ref pixels

+    mov     lr, #0              ; constant zero

+    usub8   r8, r5, r6          ; calculate difference

+    sel     r7, r8, lr          ; select bytes with positive difference

+    usub8   r9, r6, r5          ; calculate difference with reversed operands

+    sel     r8, r9, lr          ; select bytes with negative difference

+    ; calculate partial sums

+    usad8   r5, r7, lr          ; calculate sum of positive differences

+    usad8   r6, r8, lr          ; calculate sum of negative differences

+    orr     r8, r8, r7          ; differences of all 4 pixels

+    ldr     r5, [r0, #0x4]      ; load 4 src pixels

+    ; calculate sse

+    uxtb16  r6, r8              ; byte (two pixels) to halfwords

+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords

+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)

+    ; 2nd 4 pixels

+    ldr     r6, [r2, #0x4]      ; load 4 ref pixels

+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)

+    usub8   r8, r5, r6          ; calculate difference

+    sel     r7, r8, lr          ; select bytes with positive difference

+    usub8   r9, r6, r5          ; calculate difference with reversed operands

+    sel     r8, r9, lr          ; select bytes with negative difference

+    ; calculate partial sums

+    usad8   r5, r7, lr          ; calculate sum of positive differences

+    usad8   r6, r8, lr          ; calculate sum of negative differences

+    orr     r8, r8, r7          ; differences of all 4 pixels

+    ldr     r5, [r0, #0x8]      ; load 4 src pixels

+    ; calculate sse

+    uxtb16  r6, r8              ; byte (two pixels) to halfwords

+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords

+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)

+    ; 3rd 4 pixels

+    ldr     r6, [r2, #0x8]      ; load 4 ref pixels

+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)

+    usub8   r8, r5, r6          ; calculate difference

+    sel     r7, r8, lr          ; select bytes with positive difference

+    usub8   r9, r6, r5          ; calculate difference with reversed operands

+    sel     r8, r9, lr          ; select bytes with negative difference

+    ; calculate partial sums

+    usad8   r5, r7, lr          ; calculate sum of positive differences

+    usad8   r6, r8, lr          ; calculate sum of negative differences

+    orr     r8, r8, r7          ; differences of all 4 pixels

+    ldr     r5, [r0, #0xc]      ; load 4 src pixels

+    ; calculate sse

+    uxtb16  r6, r8              ; byte (two pixels) to halfwords

+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords

+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)

+    ; 4th 4 pixels

+    ldr     r6, [r2, #0xc]      ; load 4 ref pixels

+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)

+    usub8   r8, r5, r6          ; calculate difference

+    add     r0, r0, r1          ; set src_ptr to next row

+    sel     r7, r8, lr          ; select bytes with positive difference

+    usub8   r9, r6, r5          ; calculate difference with reversed operands

+    add     r2, r2, r3          ; set dst_ptr to next row

+    sel     r8, r9, lr          ; select bytes with negative difference

+    ; calculate partial sums

+    usad8   r5, r7, lr          ; calculate sum of positive differences

+    usad8   r6, r8, lr          ; calculate sum of negative differences

+    orr     r8, r8, r7          ; differences of all 4 pixels

+    subs    r12, r12, #1        ; next row

+    ; calculate sse

+    uxtb16  r6, r8              ; byte (two pixels) to halfwords

+    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords

+    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)

+    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)

+    bne     loop

+    ; return stuff

+    ldr     r1, [sp, #28]       ; get address of sse

+    mov     r0, r4              ; return sse

+    str     r4, [r1]            ; store sse

+    pop     {r4-r9, pc}

+    ENDP

+    END

--- a/vp8/encoder/arm/variance_arm.h

+++ b/vp8/encoder/arm/variance_arm.h

@@ -20,6 +20,7 @@

 extern prototype_variance(vp8_variance_halfpixvar16x16_h_armv6);

 extern prototype_variance(vp8_variance_halfpixvar16x16_v_armv6);

 extern prototype_variance(vp8_variance_halfpixvar16x16_hv_armv6);

+extern prototype_variance(vp8_mse16x16_armv6);

 #if !CONFIG_RUNTIME_CPU_DETECT

@@ -31,6 +32,9 @@

 #undef  vp8_variance_var16x16

 #define vp8_variance_var16x16 vp8_variance16x16_armv6

+#undef  vp8_variance_mse16x16

+#define vp8_variance_mse16x16 vp8_mse16x16_armv6

 #undef  vp8_variance_halfpixvar16x16_h

 #define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6

--- a/vp8/vp8cx_arm.mk

+++ b/vp8/vp8cx_arm.mk

@@ -37,6 +37,7 @@

 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM)

 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_sad16x16_armv6$(ASM)

 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM)

+VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM)

 VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/armv6/walsh_v6$(ASM)

 #File list for neon

--- a/vpx/internal/vpx_codec_internal.h

+++ b/vpx/internal/vpx_codec_internal.h

@@ -332,7 +332,7 @@

  * extended in one of two ways. First, a second, algorithm specific structure

  * can be allocated and the priv member pointed to it. Alternatively, this

  * structure can be made the first member of the algorithm specific structure,

- * and the pointer casted to the proper type.

+ * and the pointer cast to the proper type.

*/

 struct vpx_codec_priv

--- a/vpx_ports/x86_abi_support.asm

+++ b/vpx_ports/x86_abi_support.asm

@@ -168,15 +168,10 @@

     %macro GET_GOT 1

       push %1

       call %%get_got

-      %%sub_offset:

-      jmp  %%exitGG

       %%get_got:

-      mov  %1, [esp]

-      add %1, fake_got - %%sub_offset

-      ret

-      %%exitGG:

+      pop  %1

       %undef GLOBAL

-      %define GLOBAL(x) x + %1 - fake_got

+      %define GLOBAL(x) x + %1 - %%get_got

       %undef RESTORE_GOT

       %define RESTORE_GOT pop %1

     %endmacro

@@ -289,7 +284,6 @@

 %elifidn __OUTPUT_FORMAT__,macho32

 %macro SECTION_RODATA 0

 section .text

-fake_got:

 %endmacro

 %else

 %define SECTION_RODATA section .rodata

--

⑨