shithub: libvpx

Download patch

ref: 15ea268d629544885db363dfb5d1609404d5e9b0
parent: b79879c2e3ba79c0b4693b340756283e58dba3da
parent: 97259b460c5f9ca154b81e0b7251a0c81422e4b5
author: Johann <johannkoenig@google.com>
date: Wed Nov 23 03:44:21 EST 2011

Merge "Fix encoder partitioned output on ARM"

--- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -13,6 +13,7 @@
     EXPORT |vp8_encode_bool|
     EXPORT |vp8_stop_encode|
     EXPORT |vp8_encode_value|
+    IMPORT |vp8_validate_buffer_arm|
 
     INCLUDE asm_enc_offsets.asm
 
@@ -22,6 +23,20 @@
 
     AREA    |.text|, CODE, READONLY
 
+    ; macro for validating write buffer position
+    ; needs vp8_writer in r0
+    ; start shall not be in r1
+    MACRO
+    VALIDATE_POS $start, $pos
+    push {r0-r3, r12, lr}        ; rest of regs are preserved by subroutine call
+    ldr  r2, [r0, #vp8_writer_buffer_end]
+    ldr  r3, [r0, #vp8_writer_error]
+    mov  r1, $pos
+    mov  r0, $start
+    bl   vp8_validate_buffer_arm
+    pop  {r0-r3, r12, lr}
+    MEND
+
 ; r0 BOOL_CODER *br
 ; r1 unsigned char *source
 ; r2 unsigned char *source_end
@@ -43,7 +58,7 @@
 ; r1 int bit
 ; r2 int probability
 |vp8_encode_bool| PROC
-    push    {r4-r9, lr}
+    push    {r4-r10, lr}
 
     mov     r4, r2
 
@@ -106,6 +121,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r1, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r9, r1                 ; validate_buffer at pos
+
     strb    r7, [r9, r4]                ; w->buffer[w->pos++]
 
 token_count_lt_zero
@@ -114,7 +132,7 @@
     str     r2, [r0, #vp8_writer_lowvalue]
     str     r5, [r0, #vp8_writer_range]
     str     r3, [r0, #vp8_writer_count]
-    pop     {r4-r9, pc}
+    pop     {r4-r10, pc}
     ENDP
 
 ; r0 BOOL_CODER *br
@@ -179,6 +197,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r1, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r9, r1                 ; validate_buffer at pos
+
     strb    r7, [r9, r4]                ; w->buffer[w->pos++]
 
 token_count_lt_zero_se
@@ -198,7 +219,7 @@
 ; r1 int data
 ; r2 int bits
 |vp8_encode_value| PROC
-    push    {r4-r11, lr}
+    push    {r4-r12, lr}
 
     mov     r10, r2
 
@@ -270,6 +291,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r9, r11                ; validate_buffer at pos
+
     strb    r7, [r9, r4]                ; w->buffer[w->pos++]
 
 token_count_lt_zero_ev
@@ -281,7 +305,7 @@
     str     r2, [r0, #vp8_writer_lowvalue]
     str     r5, [r0, #vp8_writer_range]
     str     r3, [r0, #vp8_writer_count]
-    pop     {r4-r11, pc}
+    pop     {r4-r12, pc}
     ENDP
 
     END
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -10,6 +10,7 @@
 
 
     EXPORT |vp8cx_pack_tokens_armv5|
+    IMPORT |vp8_validate_buffer_arm|
 
     INCLUDE asm_enc_offsets.asm
 
@@ -19,6 +20,22 @@
 
     AREA    |.text|, CODE, READONLY
 
+
+    ; macro for validating write buffer position
+    ; needs vp8_writer in r0
+    ; start shall not be in r1
+    MACRO
+    VALIDATE_POS $start, $pos
+    push {r0-r3, r12, lr}        ; rest of regs are preserved by subroutine call
+    ldr  r2, [r0, #vp8_writer_buffer_end]
+    ldr  r3, [r0, #vp8_writer_error]
+    mov  r1, $pos
+    mov  r0, $start
+    bl   vp8_validate_buffer_arm
+    pop  {r0-r3, r12, lr}
+    MEND
+
+
 ; r0 vp8_writer *w
 ; r1 const TOKENEXTRA *p
 ; r2 int xcount
@@ -26,11 +43,11 @@
 ; s0 vp8_extra_bits
 ; s1 vp8_coef_tree
 |vp8cx_pack_tokens_armv5| PROC
-    push    {r4-r11, lr}
+    push    {r4-r12, lr}
+    sub     sp, sp, #16
 
     ; Add size of xcount * sizeof (TOKENEXTRA) to get stop
     ;  sizeof (TOKENEXTRA) is 8
-    sub     sp, sp, #12
     add     r2, r1, r2, lsl #3          ; stop = p + xcount*sizeof(TOKENEXTRA)
     str     r2, [sp, #0]
     str     r3, [sp, #8]                ; save vp8_coef_encodings
@@ -57,7 +74,7 @@
     subne   r8, r8, #1                  ; --n
 
     rsb     r4, r8, #32                 ; 32-n
-    ldr     r10, [sp, #52]              ; vp8_coef_tree
+    ldr     r10, [sp, #60]              ; vp8_coef_tree
 
     ; v is kept in r12 during the token pack loop
     lsl     r12, r6, r4                ; r12 = v << 32 - n
@@ -128,12 +145,15 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
 
     ; r10 is used earlier in the loop, but r10 is used as
     ; temp variable here.  So after r10 is used, reload
     ; vp8_coef_tree_dcd into r10
-    ldr     r10, [sp, #52]              ; vp8_coef_tree
+    ldr     r10, [sp, #60]              ; vp8_coef_tree
 
 token_count_lt_zero
     lsl     r2, r2, r6                  ; lowvalue <<= shift
@@ -142,7 +162,7 @@
     bne     token_loop
 
     ldrb    r6, [r1, #tokenextra_token] ; t
-    ldr     r7, [sp, #48]               ; vp8_extra_bits
+    ldr     r7, [sp, #56]               ; vp8_extra_bits
     ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
     ;  element.  Here vp8_extra_bit_struct == 16
     add     r12, r7, r6, lsl #4         ; b = vp8_extra_bits + t
@@ -223,6 +243,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
     ldr     r10, [sp, #4]               ; b->tree
 extra_count_lt_zero
@@ -271,7 +294,10 @@
     lsr     r6, r2, #24                 ; lowvalue >> 24
     add     r12, r4, #1                 ; w->pos++
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
-    str     r12, [r0, #0x10]
+    str     r12, [r0, #vp8_writer_pos]
+
+    VALIDATE_POS r7, r12               ; validate_buffer at pos
+
     strb    r6, [r7, r4]
 end_count_zero
 skip_extra_bits
@@ -284,8 +310,8 @@
     str     r2, [r0, #vp8_writer_lowvalue]
     str     r5, [r0, #vp8_writer_range]
     str     r3, [r0, #vp8_writer_count]
-    add     sp, sp, #12
-    pop     {r4-r11, pc}
+    add     sp, sp, #16
+    pop     {r4-r12, pc}
     ENDP
 
     END
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -10,6 +10,7 @@
 
 
     EXPORT |vp8cx_pack_mb_row_tokens_armv5|
+    IMPORT |vp8_validate_buffer_arm|
 
     INCLUDE asm_enc_offsets.asm
 
@@ -19,6 +20,21 @@
 
     AREA    |.text|, CODE, READONLY
 
+
+    ; macro for validating write buffer position
+    ; needs vp8_writer in r0
+    ; start shall not be in r1
+    MACRO
+    VALIDATE_POS $start, $pos
+    push {r0-r3, r12, lr}        ; rest of regs are preserved by subroutine call
+    ldr  r2, [r0, #vp8_writer_buffer_end]
+    ldr  r3, [r0, #vp8_writer_error]
+    mov  r1, $pos
+    mov  r0, $start
+    bl   vp8_validate_buffer_arm
+    pop  {r0-r3, r12, lr}
+    MEND
+
 ; r0 VP8_COMP *cpi
 ; r1 vp8_writer *w
 ; r2 vp8_coef_encodings
@@ -26,7 +42,7 @@
 ; s0 vp8_coef_tree
 
 |vp8cx_pack_mb_row_tokens_armv5| PROC
-    push    {r4-r11, lr}
+    push    {r4-r12, lr}
     sub     sp, sp, #24
 
     ; Compute address of cpi->common.mb_rows
@@ -79,7 +95,7 @@
     subne   r8, r8, #1                  ; --n
 
     rsb     r4, r8, #32                 ; 32-n
-    ldr     r10, [sp, #60]              ; vp8_coef_tree
+    ldr     r10, [sp, #64]              ; vp8_coef_tree
 
     ; v is kept in r12 during the token pack loop
     lsl     r12, r6, r4                 ; r12 = v << 32 - n
@@ -150,12 +166,15 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
 
     ; r10 is used earlier in the loop, but r10 is used as
     ; temp variable here.  So after r10 is used, reload
     ; vp8_coef_tree_dcd into r10
-    ldr     r10, [sp, #60]              ; vp8_coef_tree
+    ldr     r10, [sp, #64]              ; vp8_coef_tree
 
 token_count_lt_zero
     lsl     r2, r2, r6                  ; lowvalue <<= shift
@@ -245,6 +264,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
     ldr     r10, [sp, #4]               ; b->tree
 extra_count_lt_zero
@@ -293,7 +315,10 @@
     lsr     r6, r2, #24                 ; lowvalue >> 24
     add     r12, r4, #1                 ; w->pos++
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
-    str     r12, [r0, #0x10]
+    str     r12, [r0, #vp8_writer_pos]
+
+    VALIDATE_POS r7, r12               ; validate_buffer at pos
+
     strb    r6, [r7, r4]
 end_count_zero
 skip_extra_bits
@@ -314,7 +339,7 @@
     str     r5, [r0, #vp8_writer_range]
     str     r3, [r0, #vp8_writer_count]
     add     sp, sp, #24
-    pop     {r4-r11, pc}
+    pop     {r4-r12, pc}
     ENDP
 
 _VP8_COMP_common_
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -10,6 +10,7 @@
 
 
     EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
+    IMPORT |vp8_validate_buffer_arm|
 
     INCLUDE asm_enc_offsets.asm
 
@@ -19,17 +20,31 @@
 
     AREA    |.text|, CODE, READONLY
 
+    ; macro for validating write buffer position
+    ; needs vp8_writer in r0
+    ; start shall not be in r1
+    MACRO
+    VALIDATE_POS $start, $pos
+    push {r0-r3, r12, lr}        ; rest of regs are preserved by subroutine call
+    ldr  r2, [r0, #vp8_writer_buffer_end]
+    ldr  r3, [r0, #vp8_writer_error]
+    mov  r1, $pos
+    mov  r0, $start
+    bl   vp8_validate_buffer_arm
+    pop  {r0-r3, r12, lr}
+    MEND
+
 ; r0 VP8_COMP *cpi
 ; r1 unsigned char *cx_data
-; r2 int num_part
-; r3 *size
+; r2 const unsigned char *cx_data_end
+; r3 int num_part
 ; s0 vp8_coef_encodings
 ; s1 vp8_extra_bits,
-; s2 const vp8_tree_index *,
+; s2 const vp8_tree_index *
 
 |vp8cx_pack_tokens_into_partitions_armv5| PROC
-    push    {r4-r11, lr}
-    sub     sp, sp, #44
+    push    {r4-r12, lr}
+    sub     sp, sp, #40
 
     ; Compute address of cpi->common.mb_rows
     ldr     r4, _VP8_COMP_common_
@@ -39,24 +54,16 @@
     ldr     r5, [r4, r6]                ; load up mb_rows
 
     str     r5, [sp, #36]               ; save mb_rows
-    str     r1, [sp, #24]               ; save cx_data
-    str     r2, [sp, #20]               ; save num_part
-    str     r3, [sp, #8]                ; save *size
+    str     r1, [sp, #24]               ; save ptr = cx_data
+    str     r3, [sp, #20]               ; save num_part
+    str     r2, [sp, #8]                ; save cx_data_end
 
-    ; *size = 3*(num_part -1 );
-    sub     r2, r2, #1                  ; num_part - 1
-    add     r2, r2, r2, lsl #1          ; 3*(num_part - 1)
-    str     r2, [r3]
-
-    add     r2, r2, r1                  ; cx_data + *size
-    str     r2, [sp, #40]               ; ptr
-
     ldr     r4, _VP8_COMP_tplist_
     add     r4, r0, r4
     ldr     r7, [r4, #0]                ; dereference cpi->tp_list
     str     r7, [sp, #32]               ; store start of cpi->tp_list
 
-    ldr     r11, _VP8_COMP_bc2_         ; load up vp8_writer out of cpi
+    ldr     r11, _VP8_COMP_bc_          ; load up vp8_writer out of cpi
     add     r0, r0, r11
 
     mov     r11, #0
@@ -63,7 +70,10 @@
     str     r11, [sp, #28]              ; i
 
 numparts_loop
-    ldr     r10, [sp, #40]              ; ptr
+    ldr     r2, _vp8_writer_sz_         ; load up sizeof(vp8_writer)
+    add     r0, r2                      ; bc[i + 1]
+
+    ldr     r10, [sp, #24]              ; ptr
     ldr     r5,  [sp, #36]              ; move mb_rows to the counting section
     subs    r5, r5, r11                 ; move start point with each partition
                                         ; mb_rows starts at i
@@ -72,6 +82,10 @@
     ; Reset all of the VP8 Writer data for each partition that
     ; is processed.
     ; start_encode
+
+    ldr     r3, [sp, #8]
+    str     r3, [r0, #vp8_writer_buffer_end]
+
     mov     r2, #0                      ; vp8_writer_lowvalue
     mov     r5, #255                    ; vp8_writer_range
     mvn     r3, #23                     ; vp8_writer_count
@@ -182,6 +196,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
 
     ; r10 is used earlier in the loop, but r10 is used as
@@ -277,6 +294,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
     ldr     r10, [sp, #4]               ; b->tree
 extra_count_lt_zero
@@ -320,12 +340,15 @@
     bne     end_count_zero
 
     ldr     r4, [r0, #vp8_writer_pos]
-    mvn     r3, #7
+    mvn     r3, #7                      ; count = -8
     ldr     r7, [r0, #vp8_writer_buffer]
     lsr     r6, r2, #24                 ; lowvalue >> 24
     add     r12, r4, #1                 ; w->pos++
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
-    str     r12, [r0, #0x10]
+    str     r12, [r0, #vp8_writer_pos]
+
+    VALIDATE_POS r7, r12                ; validate_buffer at pos
+
     strb    r6, [r7, r4]
 end_count_zero
 skip_extra_bits
@@ -401,6 +424,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
 
 token_count_lt_zero_se
@@ -409,34 +435,11 @@
     subs    r12, r12, #1
     bne     stop_encode_loop
 
-    ldr     r10, [sp, #8]               ; *size
-    ldr     r11, [r10]
     ldr     r4,  [r0, #vp8_writer_pos]  ; w->pos
-    add     r11, r11, r4                ; *size += w->pos
-    str     r11, [r10]
-
-    ldr     r9, [sp, #20]               ; num_parts
-    sub     r9, r9, #1
-    ldr     r10, [sp, #28]              ; i
-    cmp     r10, r9                     ; if(i<(num_part - 1))
-    bge     skip_write_partition
-
-    ldr     r12, [sp, #40]              ; ptr
+    ldr     r12, [sp, #24]              ; ptr
     add     r12, r12, r4                ; ptr += w->pos
-    str     r12, [sp, #40]
+    str     r12, [sp, #24]
 
-    ldr     r9, [sp, #24]               ; cx_data
-    mov     r8, r4, asr #8
-    strb    r4, [r9, #0]
-    strb    r8, [r9, #1]
-    mov     r4, r4, asr #16
-    strb    r4, [r9, #2]
-
-    add     r9, r9, #3                  ; cx_data += 3
-    str     r9, [sp, #24]
-
-skip_write_partition
-
     ldr     r11, [sp, #28]              ; i
     ldr     r10, [sp, #20]              ; num_parts
 
@@ -451,9 +454,8 @@
     cmp     r10, r11
     bgt     numparts_loop
 
-
-    add     sp, sp, #44
-    pop     {r4-r11, pc}
+    add     sp, sp, #40
+    pop     {r4-r12, pc}
     ENDP
 
 _VP8_COMP_common_
@@ -462,7 +464,9 @@
     DCD     vp8_common_mb_rows
 _VP8_COMP_tplist_
     DCD     vp8_comp_tplist
-_VP8_COMP_bc2_
-    DCD     vp8_comp_bc2
+_VP8_COMP_bc_
+    DCD     vp8_comp_bc
+_vp8_writer_sz_
+    DCD     vp8_writer_sz
 
     END
--- a/vp8/encoder/arm/boolhuff_arm.c
+++ b/vp8/encoder/arm/boolhuff_arm.c
@@ -10,7 +10,7 @@
 
 
 #include "vp8/encoder/boolhuff.h"
-#include "vp8/common/blockd.h"
+#include "vpx/internal/vpx_codec_internal.h"
 
 const unsigned int vp8_prob_cost[256] =
 {
@@ -32,3 +32,10 @@
     22,   21,   19,   18,   16,   15,   13,   12,   10,    9,    7,    6,    4,    3,    1,   1
 };
 
+int vp8_validate_buffer_arm(const unsigned char *start,
+                            size_t               len,
+                            const unsigned char *end,
+                            struct vpx_internal_error_info *error)
+{
+    return validate_buffer(start, len, end, error);
+}
--- a/vp8/encoder/asm_enc_offsets.c
+++ b/vp8/encoder/asm_enc_offsets.c
@@ -50,6 +50,7 @@
 DEFINE(vp8_writer_pos,                          offsetof(vp8_writer, pos));
 DEFINE(vp8_writer_buffer,                       offsetof(vp8_writer, buffer));
 DEFINE(vp8_writer_buffer_end,                   offsetof(vp8_writer, buffer_end));
+DEFINE(vp8_writer_error,                        offsetof(vp8_writer, error));
 
 DEFINE(tokenextra_token,                        offsetof(TOKENEXTRA, Token));
 DEFINE(tokenextra_extra,                        offsetof(TOKENEXTRA, Extra));
@@ -69,7 +70,8 @@
 
 DEFINE(vp8_comp_tplist,                         offsetof(VP8_COMP, tplist));
 DEFINE(vp8_comp_common,                         offsetof(VP8_COMP, common));
-DEFINE(vp8_comp_bc2,                            offsetof(VP8_COMP, bc2));
+DEFINE(vp8_comp_bc ,                            offsetof(VP8_COMP, bc));
+DEFINE(vp8_writer_sz ,                          sizeof(vp8_writer));
 
 DEFINE(tokenlist_start,                         offsetof(TOKENLIST, start));
 DEFINE(tokenlist_stop,                          offsetof(TOKENLIST, stop));
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -109,7 +109,7 @@
 {
     VP8_COMMON *const x = & cpi->common;
 
-    vp8_writer *const w = & cpi->bc;
+    vp8_writer *const w = cpi->bc;
 
     {
         vp8_prob Pnew   [VP8_YMODES-1];
@@ -374,7 +374,9 @@
 
 }
 
-static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data, unsigned char * cx_data_end, int num_part, int *size)
+static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
+                                          unsigned char * cx_data_end,
+                                          int num_part)
 {
 
     int i;
@@ -381,13 +383,12 @@
     unsigned char *ptr = cx_data;
     unsigned char *ptr_end = cx_data_end;
     unsigned int shift;
-    vp8_writer *w = &cpi->bc2;
-    *size = 3 * (num_part - 1);
-    cpi->partition_sz[0] += *size;
-    ptr = cx_data + (*size);
+    vp8_writer *w;
+    ptr = cx_data;
 
     for (i = 0; i < num_part; i++)
     {
+        w = cpi->bc + i + 1;
         vp8_start_encode(w, ptr, ptr_end);
         {
             unsigned int split;
@@ -597,17 +598,7 @@
         }
 
         vp8_stop_encode(w);
-        *size +=   w->pos;
-
-        /* The first partition size is set earlier */
-        cpi->partition_sz[i + 1] = w->pos;
-
-        if (i < (num_part - 1))
-        {
-            write_partition_size(cx_data, w->pos);
-            cx_data += 3;
-            ptr += w->pos;
-        }
+        ptr += w->pos;
     }
 }
 
@@ -892,7 +883,7 @@
 static void pack_inter_mode_mvs(VP8_COMP *const cpi)
 {
     VP8_COMMON *const pc = & cpi->common;
-    vp8_writer *const w = & cpi->bc;
+    vp8_writer *const w = cpi->bc;
     const MV_CONTEXT *mvc = pc->fc.mvc;
 
     const int *const rfct = cpi->count_mb_ref_frame_usage;
@@ -1107,7 +1098,7 @@
 
 static void write_kfmodes(VP8_COMP *cpi)
 {
-    vp8_writer *const bc = & cpi->bc;
+    vp8_writer *const bc = cpi->bc;
     const VP8_COMMON *const c = & cpi->common;
     /* const */
     MODE_INFO *m = c->mi;
@@ -1437,7 +1428,7 @@
 static void update_coef_probs(VP8_COMP *cpi)
 {
     int i = 0;
-    vp8_writer *const w = & cpi->bc;
+    vp8_writer *const w = cpi->bc;
     int savings = 0;
 
     vp8_clear_system_state(); //__asm emms;
@@ -1583,7 +1574,7 @@
     int i, j;
     VP8_HEADER oh;
     VP8_COMMON *const pc = & cpi->common;
-    vp8_writer *const bc = & cpi->bc;
+    vp8_writer *const bc = cpi->bc;
     MACROBLOCKD *const xd = & cpi->mb.e_mbd;
     int extra_bytes_packed = 0;
 
@@ -1598,8 +1589,7 @@
 
     mb_feature_data_bits = vp8_mb_feature_data_bits;
 
-    cpi->bc.error = &pc->error;
-    cpi->bc2.error = &pc->error;
+    bc[0].error = &pc->error;
 
     validate_buffer(cx_data, 3, cx_data_end, &cpi->common.error);
     cx_data += 3;
@@ -1879,8 +1869,10 @@
 
     vp8_stop_encode(bc);
 
-    oh.first_partition_length_in_bytes = cpi->bc.pos;
+    cx_data += bc->pos;
 
+    oh.first_partition_length_in_bytes = cpi->bc->pos;
+
     /* update frame tag */
     {
         int v = (oh.first_partition_length_in_bytes << 5) |
@@ -1893,34 +1885,58 @@
         dest[2] = v >> 16;
     }
 
-    *size = VP8_HEADER_SIZE + extra_bytes_packed + cpi->bc.pos;
+    *size = VP8_HEADER_SIZE + extra_bytes_packed + cpi->bc->pos;
+
     cpi->partition_sz[0] = *size;
 
     if (pc->multi_token_partition != ONE_PARTITION)
     {
-        int num_part;
-        int asize;
-        num_part = 1 << pc->multi_token_partition;
+        int num_part = 1 << pc->multi_token_partition;
 
-        pack_tokens_into_partitions(cpi, cx_data + bc->pos, cx_data_end, num_part, &asize);
+        /* partition size table at the end of first partition */
+        cpi->partition_sz[0] += 3 * (num_part - 1);
+        *size += 3 * (num_part - 1);
 
-        *size += asize;
+        validate_buffer(cx_data, 3 * (num_part - 1), cx_data_end,
+                        &pc->error);
+
+        for(i = 1; i < num_part + 1; i++)
+        {
+            cpi->bc[i].error = &pc->error;
+        }
+
+        pack_tokens_into_partitions(cpi, cx_data + 3 * (num_part - 1),
+                                    cx_data_end, num_part);
+
+        for(i = 1; i < num_part; i++)
+        {
+            cpi->partition_sz[i] = cpi->bc[i].pos;
+            write_partition_size(cx_data, cpi->partition_sz[i]);
+            cx_data += 3;
+            *size += cpi->partition_sz[i]; /* add to total */
+        }
+
+        /* add last partition to total size */
+        cpi->partition_sz[i] = cpi->bc[i].pos;
+        *size += cpi->partition_sz[i];
     }
     else
     {
-        vp8_start_encode(&cpi->bc2, cx_data + bc->pos, cx_data_end);
+        bc[1].error = &pc->error;
 
+        vp8_start_encode(&cpi->bc[1], cx_data, cx_data_end);
+
 #if CONFIG_MULTITHREAD
         if (cpi->b_multi_threaded)
-            pack_mb_row_tokens(cpi, &cpi->bc2);
+            pack_mb_row_tokens(cpi, &cpi->bc[1]);
         else
 #endif
-            pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count);
+            pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
 
-        vp8_stop_encode(&cpi->bc2);
+        vp8_stop_encode(&cpi->bc[1]);
 
-        *size += cpi->bc2.pos;
-        cpi->partition_sz[1] = cpi->bc2.pos;
+        *size += cpi->bc[1].pos;
+        cpi->partition_sz[1] = cpi->bc[1].pos;
     }
 }
 
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -17,10 +17,13 @@
                              vp8_token *,
                              vp8_extra_bit_struct *,
                              const vp8_tree_index *);
-void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *,
-        vp8_token *,
-        vp8_extra_bit_struct *,
-        const vp8_tree_index *);
+void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *,
+                                             unsigned char * cx_data,
+                                             const unsigned char *cx_data_end,
+                                             int num_parts,
+                                             vp8_token *,
+                                             vp8_extra_bit_struct *,
+                                             const vp8_tree_index *);
 void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
                                     vp8_token *,
                                     vp8_extra_bit_struct *,
@@ -27,13 +30,14 @@
                                     const vp8_tree_index *);
 # define pack_tokens(a,b,c)                  \
     vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
-# define pack_tokens_into_partitions(a,b,unused,c,d)  \
+# define pack_tokens_into_partitions(a,b,c,d)  \
     vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
 # define pack_mb_row_tokens(a,b)               \
     vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
 #else
-# define pack_tokens(a,b,c)                  pack_tokens_c(a,b,c)
-# define pack_tokens_into_partitions(a,b,c,d,e)  pack_tokens_into_partitions_c(a,b,c,d,e)
+# define pack_tokens(a,b,c)                    pack_tokens_c(a,b,c)
+# define pack_tokens_into_partitions(a,b,c,d)  pack_tokens_into_partitions_c(a,b,c,d)
 # define pack_mb_row_tokens(a,b)               pack_mb_row_tokens_c(a,b)
 #endif
+
 #endif
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -395,7 +395,7 @@
 
 void vp8_write_mvprobs(VP8_COMP *cpi)
 {
-    vp8_writer *const w  = & cpi->bc;
+    vp8_writer *const w  = cpi->bc;
     MV_CONTEXT *mvc = cpi->common.fc.mvc;
     int flags[2] = {0, 0};
 #ifdef ENTROPY_STATS
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1436,7 +1436,7 @@
         cpi->mt_sync_range = 16;
 #endif
 
-        vpx_free(cpi->tplist);
+    vpx_free(cpi->tplist);
 
     CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
 }
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -314,8 +314,7 @@
 
     MACROBLOCK mb;
     VP8_COMMON common;
-    vp8_writer bc, bc2;
-    // bool_writer *bc2;
+    vp8_writer bc[9]; // one boolcoder for each partition
 
     VP8_CONFIG oxcf;