shithub: libvpx

Download patch

ref: 97259b460c5f9ca154b81e0b7251a0c81422e4b5
parent: c84d42f86499b0b2af2f57a97ec8ca30529817b5
author: Attila Nagy <attilanagy@google.com>
date: Thu Nov 10 08:07:37 EST 2011

Fix encoder partitioned output on ARM

API was not returning correct partition sizes on arm targets.
The armv5 token packing functions were not storing the information to the
partition size table.
As a fix, have one boolcoder instance allocated for each partition so
that partition sizes are internally available after all partitions
were encoded. This will also allow more flexibility in producing
several partitions in parallel.

Use buffer validation (overflow check) in all ARM bitpacking
functions.

Change-Id: I31c8a11d8a7613676f0ff50928cb2a2ab14fd169

--- a/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -13,6 +13,7 @@
     EXPORT |vp8_encode_bool|
     EXPORT |vp8_stop_encode|
     EXPORT |vp8_encode_value|
+    IMPORT |vp8_validate_buffer_arm|
 
     INCLUDE asm_enc_offsets.asm
 
@@ -22,6 +23,20 @@
 
     AREA    |.text|, CODE, READONLY
 
+    ; macro for validating write buffer position
+    ; needs vp8_writer in r0
+    ; start shall not be in r1
+    MACRO
+    VALIDATE_POS $start, $pos
+    push {r0-r3, r12, lr}        ; rest of regs are preserved by subroutine call
+    ldr  r2, [r0, #vp8_writer_buffer_end]
+    ldr  r3, [r0, #vp8_writer_error]
+    mov  r1, $pos
+    mov  r0, $start
+    bl   vp8_validate_buffer_arm
+    pop  {r0-r3, r12, lr}
+    MEND
+
 ; r0 BOOL_CODER *br
 ; r1 unsigned char *source
 ; r2 unsigned char *source_end
@@ -43,7 +58,7 @@
 ; r1 int bit
 ; r2 int probability
 |vp8_encode_bool| PROC
-    push    {r4-r9, lr}
+    push    {r4-r10, lr}
 
     mov     r4, r2
 
@@ -106,6 +121,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r1, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r9, r1                 ; validate_buffer at pos
+
     strb    r7, [r9, r4]                ; w->buffer[w->pos++]
 
 token_count_lt_zero
@@ -114,7 +132,7 @@
     str     r2, [r0, #vp8_writer_lowvalue]
     str     r5, [r0, #vp8_writer_range]
     str     r3, [r0, #vp8_writer_count]
-    pop     {r4-r9, pc}
+    pop     {r4-r10, pc}
     ENDP
 
 ; r0 BOOL_CODER *br
@@ -179,6 +197,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r1, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r9, r1                 ; validate_buffer at pos
+
     strb    r7, [r9, r4]                ; w->buffer[w->pos++]
 
 token_count_lt_zero_se
@@ -198,7 +219,7 @@
 ; r1 int data
 ; r2 int bits
 |vp8_encode_value| PROC
-    push    {r4-r11, lr}
+    push    {r4-r12, lr}
 
     mov     r10, r2
 
@@ -270,6 +291,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r9, r11                ; validate_buffer at pos
+
     strb    r7, [r9, r4]                ; w->buffer[w->pos++]
 
 token_count_lt_zero_ev
@@ -281,7 +305,7 @@
     str     r2, [r0, #vp8_writer_lowvalue]
     str     r5, [r0, #vp8_writer_range]
     str     r3, [r0, #vp8_writer_count]
-    pop     {r4-r11, pc}
+    pop     {r4-r12, pc}
     ENDP
 
     END
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -10,6 +10,7 @@
 
 
     EXPORT |vp8cx_pack_tokens_armv5|
+    IMPORT |vp8_validate_buffer_arm|
 
     INCLUDE asm_enc_offsets.asm
 
@@ -19,6 +20,22 @@
 
     AREA    |.text|, CODE, READONLY
 
+
+    ; macro for validating write buffer position
+    ; needs vp8_writer in r0
+    ; start shall not be in r1
+    MACRO
+    VALIDATE_POS $start, $pos
+    push {r0-r3, r12, lr}        ; rest of regs are preserved by subroutine call
+    ldr  r2, [r0, #vp8_writer_buffer_end]
+    ldr  r3, [r0, #vp8_writer_error]
+    mov  r1, $pos
+    mov  r0, $start
+    bl   vp8_validate_buffer_arm
+    pop  {r0-r3, r12, lr}
+    MEND
+
+
 ; r0 vp8_writer *w
 ; r1 const TOKENEXTRA *p
 ; r2 int xcount
@@ -26,11 +43,11 @@
 ; s0 vp8_extra_bits
 ; s1 vp8_coef_tree
 |vp8cx_pack_tokens_armv5| PROC
-    push    {r4-r11, lr}
+    push    {r4-r12, lr}
+    sub     sp, sp, #16
 
     ; Add size of xcount * sizeof (TOKENEXTRA) to get stop
     ;  sizeof (TOKENEXTRA) is 8
-    sub     sp, sp, #12
     add     r2, r1, r2, lsl #3          ; stop = p + xcount*sizeof(TOKENEXTRA)
     str     r2, [sp, #0]
     str     r3, [sp, #8]                ; save vp8_coef_encodings
@@ -57,7 +74,7 @@
     subne   r8, r8, #1                  ; --n
 
     rsb     r4, r8, #32                 ; 32-n
-    ldr     r10, [sp, #52]              ; vp8_coef_tree
+    ldr     r10, [sp, #60]              ; vp8_coef_tree
 
     ; v is kept in r12 during the token pack loop
     lsl     r12, r6, r4                ; r12 = v << 32 - n
@@ -128,12 +145,15 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
 
     ; r10 is used earlier in the loop, but r10 is used as
     ; temp variable here.  So after r10 is used, reload
     ; vp8_coef_tree_dcd into r10
-    ldr     r10, [sp, #52]              ; vp8_coef_tree
+    ldr     r10, [sp, #60]              ; vp8_coef_tree
 
 token_count_lt_zero
     lsl     r2, r2, r6                  ; lowvalue <<= shift
@@ -142,7 +162,7 @@
     bne     token_loop
 
     ldrb    r6, [r1, #tokenextra_token] ; t
-    ldr     r7, [sp, #48]               ; vp8_extra_bits
+    ldr     r7, [sp, #56]               ; vp8_extra_bits
     ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
     ;  element.  Here vp8_extra_bit_struct == 16
     add     r12, r7, r6, lsl #4         ; b = vp8_extra_bits + t
@@ -223,6 +243,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
     ldr     r10, [sp, #4]               ; b->tree
 extra_count_lt_zero
@@ -271,7 +294,10 @@
     lsr     r6, r2, #24                 ; lowvalue >> 24
     add     r12, r4, #1                 ; w->pos++
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
-    str     r12, [r0, #0x10]
+    str     r12, [r0, #vp8_writer_pos]
+
+    VALIDATE_POS r7, r12               ; validate_buffer at pos
+
     strb    r6, [r7, r4]
 end_count_zero
 skip_extra_bits
@@ -284,8 +310,8 @@
     str     r2, [r0, #vp8_writer_lowvalue]
     str     r5, [r0, #vp8_writer_range]
     str     r3, [r0, #vp8_writer_count]
-    add     sp, sp, #12
-    pop     {r4-r11, pc}
+    add     sp, sp, #16
+    pop     {r4-r12, pc}
     ENDP
 
     END
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -10,6 +10,7 @@
 
 
     EXPORT |vp8cx_pack_mb_row_tokens_armv5|
+    IMPORT |vp8_validate_buffer_arm|
 
     INCLUDE asm_enc_offsets.asm
 
@@ -19,6 +20,21 @@
 
     AREA    |.text|, CODE, READONLY
 
+
+    ; macro for validating write buffer position
+    ; needs vp8_writer in r0
+    ; start shall not be in r1
+    MACRO
+    VALIDATE_POS $start, $pos
+    push {r0-r3, r12, lr}        ; rest of regs are preserved by subroutine call
+    ldr  r2, [r0, #vp8_writer_buffer_end]
+    ldr  r3, [r0, #vp8_writer_error]
+    mov  r1, $pos
+    mov  r0, $start
+    bl   vp8_validate_buffer_arm
+    pop  {r0-r3, r12, lr}
+    MEND
+
 ; r0 VP8_COMP *cpi
 ; r1 vp8_writer *w
 ; r2 vp8_coef_encodings
@@ -26,7 +42,7 @@
 ; s0 vp8_coef_tree
 
 |vp8cx_pack_mb_row_tokens_armv5| PROC
-    push    {r4-r11, lr}
+    push    {r4-r12, lr}
     sub     sp, sp, #24
 
     ; Compute address of cpi->common.mb_rows
@@ -79,7 +95,7 @@
     subne   r8, r8, #1                  ; --n
 
     rsb     r4, r8, #32                 ; 32-n
-    ldr     r10, [sp, #60]              ; vp8_coef_tree
+    ldr     r10, [sp, #64]              ; vp8_coef_tree
 
     ; v is kept in r12 during the token pack loop
     lsl     r12, r6, r4                 ; r12 = v << 32 - n
@@ -150,12 +166,15 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
 
     ; r10 is used earlier in the loop, but r10 is used as
     ; temp variable here.  So after r10 is used, reload
     ; vp8_coef_tree_dcd into r10
-    ldr     r10, [sp, #60]              ; vp8_coef_tree
+    ldr     r10, [sp, #64]              ; vp8_coef_tree
 
 token_count_lt_zero
     lsl     r2, r2, r6                  ; lowvalue <<= shift
@@ -245,6 +264,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
     ldr     r10, [sp, #4]               ; b->tree
 extra_count_lt_zero
@@ -293,7 +315,10 @@
     lsr     r6, r2, #24                 ; lowvalue >> 24
     add     r12, r4, #1                 ; w->pos++
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
-    str     r12, [r0, #0x10]
+    str     r12, [r0, #vp8_writer_pos]
+
+    VALIDATE_POS r7, r12               ; validate_buffer at pos
+
     strb    r6, [r7, r4]
 end_count_zero
 skip_extra_bits
@@ -314,7 +339,7 @@
     str     r5, [r0, #vp8_writer_range]
     str     r3, [r0, #vp8_writer_count]
     add     sp, sp, #24
-    pop     {r4-r11, pc}
+    pop     {r4-r12, pc}
     ENDP
 
 _VP8_COMP_common_
--- a/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -10,6 +10,7 @@
 
 
     EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
+    IMPORT |vp8_validate_buffer_arm|
 
     INCLUDE asm_enc_offsets.asm
 
@@ -19,17 +20,31 @@
 
     AREA    |.text|, CODE, READONLY
 
+    ; macro for validating write buffer position
+    ; needs vp8_writer in r0
+    ; start shall not be in r1
+    MACRO
+    VALIDATE_POS $start, $pos
+    push {r0-r3, r12, lr}        ; rest of regs are preserved by subroutine call
+    ldr  r2, [r0, #vp8_writer_buffer_end]
+    ldr  r3, [r0, #vp8_writer_error]
+    mov  r1, $pos
+    mov  r0, $start
+    bl   vp8_validate_buffer_arm
+    pop  {r0-r3, r12, lr}
+    MEND
+
 ; r0 VP8_COMP *cpi
 ; r1 unsigned char *cx_data
-; r2 int num_part
-; r3 *size
+; r2 const unsigned char *cx_data_end
+; r3 int num_part
 ; s0 vp8_coef_encodings
 ; s1 vp8_extra_bits,
-; s2 const vp8_tree_index *,
+; s2 const vp8_tree_index *
 
 |vp8cx_pack_tokens_into_partitions_armv5| PROC
-    push    {r4-r11, lr}
-    sub     sp, sp, #44
+    push    {r4-r12, lr}
+    sub     sp, sp, #40
 
     ; Compute address of cpi->common.mb_rows
     ldr     r4, _VP8_COMP_common_
@@ -39,24 +54,16 @@
     ldr     r5, [r4, r6]                ; load up mb_rows
 
     str     r5, [sp, #36]               ; save mb_rows
-    str     r1, [sp, #24]               ; save cx_data
-    str     r2, [sp, #20]               ; save num_part
-    str     r3, [sp, #8]                ; save *size
+    str     r1, [sp, #24]               ; save ptr = cx_data
+    str     r3, [sp, #20]               ; save num_part
+    str     r2, [sp, #8]                ; save cx_data_end
 
-    ; *size = 3*(num_part -1 );
-    sub     r2, r2, #1                  ; num_part - 1
-    add     r2, r2, r2, lsl #1          ; 3*(num_part - 1)
-    str     r2, [r3]
-
-    add     r2, r2, r1                  ; cx_data + *size
-    str     r2, [sp, #40]               ; ptr
-
     ldr     r4, _VP8_COMP_tplist_
     add     r4, r0, r4
     ldr     r7, [r4, #0]                ; dereference cpi->tp_list
     str     r7, [sp, #32]               ; store start of cpi->tp_list
 
-    ldr     r11, _VP8_COMP_bc2_         ; load up vp8_writer out of cpi
+    ldr     r11, _VP8_COMP_bc_          ; load up vp8_writer out of cpi
     add     r0, r0, r11
 
     mov     r11, #0
@@ -63,7 +70,10 @@
     str     r11, [sp, #28]              ; i
 
 numparts_loop
-    ldr     r10, [sp, #40]              ; ptr
+    ldr     r2, _vp8_writer_sz_         ; load up sizeof(vp8_writer)
+    add     r0, r2                      ; bc[i + 1]
+
+    ldr     r10, [sp, #24]              ; ptr
     ldr     r5,  [sp, #36]              ; move mb_rows to the counting section
     subs    r5, r5, r11                 ; move start point with each partition
                                         ; mb_rows starts at i
@@ -72,6 +82,10 @@
     ; Reset all of the VP8 Writer data for each partition that
     ; is processed.
     ; start_encode
+
+    ldr     r3, [sp, #8]
+    str     r3, [r0, #vp8_writer_buffer_end]
+
     mov     r2, #0                      ; vp8_writer_lowvalue
     mov     r5, #255                    ; vp8_writer_range
     mvn     r3, #23                     ; vp8_writer_count
@@ -182,6 +196,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
 
     ; r10 is used earlier in the loop, but r10 is used as
@@ -277,6 +294,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
     ldr     r10, [sp, #4]               ; b->tree
 extra_count_lt_zero
@@ -320,12 +340,15 @@
     bne     end_count_zero
 
     ldr     r4, [r0, #vp8_writer_pos]
-    mvn     r3, #7
+    mvn     r3, #7                      ; count = -8
     ldr     r7, [r0, #vp8_writer_buffer]
     lsr     r6, r2, #24                 ; lowvalue >> 24
     add     r12, r4, #1                 ; w->pos++
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
-    str     r12, [r0, #0x10]
+    str     r12, [r0, #vp8_writer_pos]
+
+    VALIDATE_POS r7, r12                ; validate_buffer at pos
+
     strb    r6, [r7, r4]
 end_count_zero
 skip_extra_bits
@@ -401,6 +424,9 @@
     bic     r2, r2, #0xff000000         ; lowvalue &= 0xffffff
     str     r11, [r0, #vp8_writer_pos]
     sub     r3, r3, #8                  ; count -= 8
+
+    VALIDATE_POS r10, r11               ; validate_buffer at pos
+
     strb    r7, [r10, r4]               ; w->buffer[w->pos++]
 
 token_count_lt_zero_se
@@ -409,34 +435,11 @@
     subs    r12, r12, #1
     bne     stop_encode_loop
 
-    ldr     r10, [sp, #8]               ; *size
-    ldr     r11, [r10]
     ldr     r4,  [r0, #vp8_writer_pos]  ; w->pos
-    add     r11, r11, r4                ; *size += w->pos
-    str     r11, [r10]
-
-    ldr     r9, [sp, #20]               ; num_parts
-    sub     r9, r9, #1
-    ldr     r10, [sp, #28]              ; i
-    cmp     r10, r9                     ; if(i<(num_part - 1))
-    bge     skip_write_partition
-
-    ldr     r12, [sp, #40]              ; ptr
+    ldr     r12, [sp, #24]              ; ptr
     add     r12, r12, r4                ; ptr += w->pos
-    str     r12, [sp, #40]
+    str     r12, [sp, #24]
 
-    ldr     r9, [sp, #24]               ; cx_data
-    mov     r8, r4, asr #8
-    strb    r4, [r9, #0]
-    strb    r8, [r9, #1]
-    mov     r4, r4, asr #16
-    strb    r4, [r9, #2]
-
-    add     r9, r9, #3                  ; cx_data += 3
-    str     r9, [sp, #24]
-
-skip_write_partition
-
     ldr     r11, [sp, #28]              ; i
     ldr     r10, [sp, #20]              ; num_parts
 
@@ -451,9 +454,8 @@
     cmp     r10, r11
     bgt     numparts_loop
 
-
-    add     sp, sp, #44
-    pop     {r4-r11, pc}
+    add     sp, sp, #40
+    pop     {r4-r12, pc}
     ENDP
 
 _VP8_COMP_common_
@@ -462,7 +464,9 @@
     DCD     vp8_common_mb_rows
 _VP8_COMP_tplist_
     DCD     vp8_comp_tplist
-_VP8_COMP_bc2_
-    DCD     vp8_comp_bc2
+_VP8_COMP_bc_
+    DCD     vp8_comp_bc
+_vp8_writer_sz_
+    DCD     vp8_writer_sz
 
     END
--- a/vp8/encoder/arm/boolhuff_arm.c
+++ b/vp8/encoder/arm/boolhuff_arm.c
@@ -10,7 +10,7 @@
 
 
 #include "vp8/encoder/boolhuff.h"
-#include "vp8/common/blockd.h"
+#include "vpx/internal/vpx_codec_internal.h"
 
 const unsigned int vp8_prob_cost[256] =
 {
@@ -32,3 +32,10 @@
     22,   21,   19,   18,   16,   15,   13,   12,   10,    9,    7,    6,    4,    3,    1,   1
 };
 
+int vp8_validate_buffer_arm(const unsigned char *start,
+                            size_t               len,
+                            const unsigned char *end,
+                            struct vpx_internal_error_info *error)
+{
+    return validate_buffer(start, len, end, error);
+}
--- a/vp8/encoder/asm_enc_offsets.c
+++ b/vp8/encoder/asm_enc_offsets.c
@@ -50,6 +50,7 @@
 DEFINE(vp8_writer_pos,                          offsetof(vp8_writer, pos));
 DEFINE(vp8_writer_buffer,                       offsetof(vp8_writer, buffer));
 DEFINE(vp8_writer_buffer_end,                   offsetof(vp8_writer, buffer_end));
+DEFINE(vp8_writer_error,                        offsetof(vp8_writer, error));
 
 DEFINE(tokenextra_token,                        offsetof(TOKENEXTRA, Token));
 DEFINE(tokenextra_extra,                        offsetof(TOKENEXTRA, Extra));
@@ -69,7 +70,8 @@
 
 DEFINE(vp8_comp_tplist,                         offsetof(VP8_COMP, tplist));
 DEFINE(vp8_comp_common,                         offsetof(VP8_COMP, common));
-DEFINE(vp8_comp_bc2,                            offsetof(VP8_COMP, bc2));
+DEFINE(vp8_comp_bc ,                            offsetof(VP8_COMP, bc));
+DEFINE(vp8_writer_sz ,                          sizeof(vp8_writer));
 
 DEFINE(tokenlist_start,                         offsetof(TOKENLIST, start));
 DEFINE(tokenlist_stop,                          offsetof(TOKENLIST, stop));
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -109,7 +109,7 @@
 {
     VP8_COMMON *const x = & cpi->common;
 
-    vp8_writer *const w = & cpi->bc;
+    vp8_writer *const w = cpi->bc;
 
     {
         vp8_prob Pnew   [VP8_YMODES-1];
@@ -374,7 +374,9 @@
 
 }
 
-static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data, unsigned char * cx_data_end, int num_part, int *size)
+static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
+                                          unsigned char * cx_data_end,
+                                          int num_part)
 {
 
     int i;
@@ -381,13 +383,12 @@
     unsigned char *ptr = cx_data;
     unsigned char *ptr_end = cx_data_end;
     unsigned int shift;
-    vp8_writer *w = &cpi->bc2;
-    *size = 3 * (num_part - 1);
-    cpi->partition_sz[0] += *size;
-    ptr = cx_data + (*size);
+    vp8_writer *w;
+    ptr = cx_data;
 
     for (i = 0; i < num_part; i++)
     {
+        w = cpi->bc + i + 1;
         vp8_start_encode(w, ptr, ptr_end);
         {
             unsigned int split;
@@ -597,17 +598,7 @@
         }
 
         vp8_stop_encode(w);
-        *size +=   w->pos;
-
-        /* The first partition size is set earlier */
-        cpi->partition_sz[i + 1] = w->pos;
-
-        if (i < (num_part - 1))
-        {
-            write_partition_size(cx_data, w->pos);
-            cx_data += 3;
-            ptr += w->pos;
-        }
+        ptr += w->pos;
     }
 }
 
@@ -892,7 +883,7 @@
 static void pack_inter_mode_mvs(VP8_COMP *const cpi)
 {
     VP8_COMMON *const pc = & cpi->common;
-    vp8_writer *const w = & cpi->bc;
+    vp8_writer *const w = cpi->bc;
     const MV_CONTEXT *mvc = pc->fc.mvc;
 
     const int *const rfct = cpi->count_mb_ref_frame_usage;
@@ -1107,7 +1098,7 @@
 
 static void write_kfmodes(VP8_COMP *cpi)
 {
-    vp8_writer *const bc = & cpi->bc;
+    vp8_writer *const bc = cpi->bc;
     const VP8_COMMON *const c = & cpi->common;
     /* const */
     MODE_INFO *m = c->mi;
@@ -1437,7 +1428,7 @@
 static void update_coef_probs(VP8_COMP *cpi)
 {
     int i = 0;
-    vp8_writer *const w = & cpi->bc;
+    vp8_writer *const w = cpi->bc;
     int savings = 0;
 
     vp8_clear_system_state(); //__asm emms;
@@ -1583,7 +1574,7 @@
     int i, j;
     VP8_HEADER oh;
     VP8_COMMON *const pc = & cpi->common;
-    vp8_writer *const bc = & cpi->bc;
+    vp8_writer *const bc = cpi->bc;
     MACROBLOCKD *const xd = & cpi->mb.e_mbd;
     int extra_bytes_packed = 0;
 
@@ -1598,8 +1589,7 @@
 
     mb_feature_data_bits = vp8_mb_feature_data_bits;
 
-    cpi->bc.error = &pc->error;
-    cpi->bc2.error = &pc->error;
+    bc[0].error = &pc->error;
 
     validate_buffer(cx_data, 3, cx_data_end, &cpi->common.error);
     cx_data += 3;
@@ -1879,8 +1869,10 @@
 
     vp8_stop_encode(bc);
 
-    oh.first_partition_length_in_bytes = cpi->bc.pos;
+    cx_data += bc->pos;
 
+    oh.first_partition_length_in_bytes = cpi->bc->pos;
+
     /* update frame tag */
     {
         int v = (oh.first_partition_length_in_bytes << 5) |
@@ -1893,34 +1885,58 @@
         dest[2] = v >> 16;
     }
 
-    *size = VP8_HEADER_SIZE + extra_bytes_packed + cpi->bc.pos;
+    *size = VP8_HEADER_SIZE + extra_bytes_packed + cpi->bc->pos;
+
     cpi->partition_sz[0] = *size;
 
     if (pc->multi_token_partition != ONE_PARTITION)
     {
-        int num_part;
-        int asize;
-        num_part = 1 << pc->multi_token_partition;
+        int num_part = 1 << pc->multi_token_partition;
 
-        pack_tokens_into_partitions(cpi, cx_data + bc->pos, cx_data_end, num_part, &asize);
+        /* partition size table at the end of first partition */
+        cpi->partition_sz[0] += 3 * (num_part - 1);
+        *size += 3 * (num_part - 1);
 
-        *size += asize;
+        validate_buffer(cx_data, 3 * (num_part - 1), cx_data_end,
+                        &pc->error);
+
+        for(i = 1; i < num_part + 1; i++)
+        {
+            cpi->bc[i].error = &pc->error;
+        }
+
+        pack_tokens_into_partitions(cpi, cx_data + 3 * (num_part - 1),
+                                    cx_data_end, num_part);
+
+        for(i = 1; i < num_part; i++)
+        {
+            cpi->partition_sz[i] = cpi->bc[i].pos;
+            write_partition_size(cx_data, cpi->partition_sz[i]);
+            cx_data += 3;
+            *size += cpi->partition_sz[i]; /* add to total */
+        }
+
+        /* add last partition to total size */
+        cpi->partition_sz[i] = cpi->bc[i].pos;
+        *size += cpi->partition_sz[i];
     }
     else
     {
-        vp8_start_encode(&cpi->bc2, cx_data + bc->pos, cx_data_end);
+        bc[1].error = &pc->error;
 
+        vp8_start_encode(&cpi->bc[1], cx_data, cx_data_end);
+
 #if CONFIG_MULTITHREAD
         if (cpi->b_multi_threaded)
-            pack_mb_row_tokens(cpi, &cpi->bc2);
+            pack_mb_row_tokens(cpi, &cpi->bc[1]);
         else
 #endif
-            pack_tokens(&cpi->bc2, cpi->tok, cpi->tok_count);
+            pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
 
-        vp8_stop_encode(&cpi->bc2);
+        vp8_stop_encode(&cpi->bc[1]);
 
-        *size += cpi->bc2.pos;
-        cpi->partition_sz[1] = cpi->bc2.pos;
+        *size += cpi->bc[1].pos;
+        cpi->partition_sz[1] = cpi->bc[1].pos;
     }
 }
 
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -17,10 +17,13 @@
                              vp8_token *,
                              vp8_extra_bit_struct *,
                              const vp8_tree_index *);
-void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *,
-        vp8_token *,
-        vp8_extra_bit_struct *,
-        const vp8_tree_index *);
+void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *,
+                                             unsigned char * cx_data,
+                                             const unsigned char *cx_data_end,
+                                             int num_parts,
+                                             vp8_token *,
+                                             vp8_extra_bit_struct *,
+                                             const vp8_tree_index *);
 void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
                                     vp8_token *,
                                     vp8_extra_bit_struct *,
@@ -27,13 +30,14 @@
                                     const vp8_tree_index *);
 # define pack_tokens(a,b,c)                  \
     vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
-# define pack_tokens_into_partitions(a,b,unused,c,d)  \
+# define pack_tokens_into_partitions(a,b,c,d)  \
     vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
 # define pack_mb_row_tokens(a,b)               \
     vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
 #else
-# define pack_tokens(a,b,c)                  pack_tokens_c(a,b,c)
-# define pack_tokens_into_partitions(a,b,c,d,e)  pack_tokens_into_partitions_c(a,b,c,d,e)
+# define pack_tokens(a,b,c)                    pack_tokens_c(a,b,c)
+# define pack_tokens_into_partitions(a,b,c,d)  pack_tokens_into_partitions_c(a,b,c,d)
 # define pack_mb_row_tokens(a,b)               pack_mb_row_tokens_c(a,b)
 #endif
+
 #endif
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -395,7 +395,7 @@
 
 void vp8_write_mvprobs(VP8_COMP *cpi)
 {
-    vp8_writer *const w  = & cpi->bc;
+    vp8_writer *const w  = cpi->bc;
     MV_CONTEXT *mvc = cpi->common.fc.mvc;
     int flags[2] = {0, 0};
 #ifdef ENTROPY_STATS
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1436,7 +1436,7 @@
         cpi->mt_sync_range = 16;
 #endif
 
-        vpx_free(cpi->tplist);
+    vpx_free(cpi->tplist);
 
     CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));
 }
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -314,8 +314,7 @@
 
     MACROBLOCK mb;
     VP8_COMMON common;
-    vp8_writer bc, bc2;
-    // bool_writer *bc2;
+    vp8_writer bc[9]; // one boolcoder for each partition
 
     VP8_CONFIG oxcf;