shithub: libvpx

Download patch

ref: e44ee38aef85e48601e54a13939f2a299bb583e0
parent: 0e734a63e87c9f87a5ed3dc8c2c698f0cdb1dad7
author: Hui Su <huisu@google.com>
date: Wed Jun 13 15:03:31 EDT 2012

Add lossless compression mode.

This commit adds lossless compression capability to the experimental
branch. The lossless experiment can be enabled using --enable-lossless
in configure. When the experiment is enabled, the encoder will use
lossless compression mode by command line option --lossless, and the
decoder automatically recognizes a losslessly encoded clip and decodes
accordingly.

To achieve the lossless coding, this commit has changed the following:
    1. To encode at lossless mode, encoder forces the use of unit
quantizer, i.e, Q 0, where effective quantization is 1. Encoder also
disables the usage of 8x8 transform and allows only 4x4 transform;
    2. At Q 0, the first order 4x4  DCT/IDCT have been switched over
to a pair of forward and inverse Walsh-Hadamard Transform
(http://goo.gl/EIsfy),  with proper scaling applied to match the range
of the original 4x4 DCT/IDCT pair;
    3. At Q 0, the second order remains to use the previous
walsh-hadamard transform pair. However, to maintain the reversibility
in second order transform at Q 0, scaling down is applied to first
order DC coefficients prior to forward transform, and scaling up is
applied to the second order output prior to quantization. Symmetric
upscaling and downscaling are added around inverse second order
transform;
    4. At lossless mode, encoder also disables a number of minor
features to ensure no loss is introduced, these features includes:
        a. Trellis quantization optimization
        b. Loop filtering
        c. Aggressive zero-binning, rounding and zero-bin boosting
        d. Mode based zero-bin boosting

Lossless coding test was performed on all clips within the derf set,
to verify that the commit has achieved lossless compression for all
clips. The average compression ratio is around 2.57 to 1.
(http://goo.gl/dEShs)

Change-Id: Ia3aba7dd09df40dd590f93b9aba134defbc64e34

--- a/configure
+++ b/configure
@@ -232,6 +232,7 @@
     newintramodes
     adaptive_entropy
     pred_filter
+    lossless
 "
 CONFIG_LIST="
     external_build
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -31,6 +31,10 @@
 #include "arm/idct_arm.h"
 #endif
 
+#if CONFIG_LOSSLESS
+#define WHT_UPSCALE_FACTOR 3
+#define Y2_WHT_UPSCALE_FACTOR 2
+#endif
 
 #ifndef vp8_idct_idct8
 #define vp8_idct_idct8 vp8_short_idct8x8_c
@@ -84,6 +88,14 @@
 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_c
 #endif
 extern prototype_second_order(vp8_idct_iwalsh16);
+
+#if CONFIG_LOSSLESS
+extern prototype_idct(vp8_short_inv_walsh4x4_x8_c);
+extern prototype_idct(vp8_short_inv_walsh4x4_1_x8_c);
+extern prototype_idct_scalar_add(vp8_dc_only_inv_walsh_add_c);
+extern prototype_second_order(vp8_short_inv_walsh4x4_lossless_c);
+extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c);
+#endif
 
 typedef prototype_idct((*vp8_idct_fn_t));
 typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t));
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -23,6 +23,7 @@
  *         x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
  **************************************************************************/
 #include "vpx_ports/config.h"
+#include "vp8/common/idct.h"
 
 
 #include <math.h>
@@ -195,6 +196,163 @@
     }
 }
 
+#if CONFIG_LOSSLESS
+void vp8_short_inv_walsh4x4_lossless_c(short *input, short *output)
+{
+    int i;
+    int a1, b1, c1, d1;
+    short *ip = input;
+    short *op = output;
+
+    for (i = 0; i < 4; i++)
+    {
+        a1 = ((ip[0] + ip[3]))>>Y2_WHT_UPSCALE_FACTOR;
+        b1 = ((ip[1] + ip[2]))>>Y2_WHT_UPSCALE_FACTOR;
+        c1 = ((ip[1] - ip[2]))>>Y2_WHT_UPSCALE_FACTOR;
+        d1 = ((ip[0] - ip[3]))>>Y2_WHT_UPSCALE_FACTOR;
+
+        op[0] = (a1 + b1 + 1)>>1;
+        op[1] = (c1 + d1)>>1;
+        op[2] = (a1 - b1)>>1;
+        op[3] = (d1 - c1)>>1;
+
+        ip += 4;
+        op += 4;
+    }
+
+    ip = output;
+    op = output;
+    for (i = 0; i < 4; i++)
+    {
+        a1 = ip[0] + ip[12];
+        b1 = ip[4] + ip[8];
+        c1 = ip[4] - ip[8];
+        d1 = ip[0] - ip[12];
+
+
+        op[0] = ((a1 + b1 + 1)>>1)<<Y2_WHT_UPSCALE_FACTOR;
+        op[4] = ((c1 + d1)>>1)<<Y2_WHT_UPSCALE_FACTOR;
+        op[8] = ((a1 - b1)>>1)<<Y2_WHT_UPSCALE_FACTOR;
+        op[12]= ((d1 - c1)>>1)<<Y2_WHT_UPSCALE_FACTOR;
+
+        ip++;
+        op++;
+    }
+}
+
+void vp8_short_inv_walsh4x4_1_lossless_c(short *in, short *out)
+{
+    int i;
+    short tmp[4];
+    short *ip = in;
+    short *op = tmp;
+
+    op[0] =((ip[0]>>Y2_WHT_UPSCALE_FACTOR)+ 1)>>1;
+    op[1] = op[2] = op[3] = ((ip[0]>>Y2_WHT_UPSCALE_FACTOR)>>1);
+
+    ip = tmp;
+    op = out;
+    for(i = 0; i<4; i++)
+    {
+        op[0] =((ip[0]+ 1)>>1)<<Y2_WHT_UPSCALE_FACTOR;
+        op[4] = op[8] = op[12] = ((ip[0]>>1))<<Y2_WHT_UPSCALE_FACTOR;
+        ip ++;
+        op ++;
+    }
+}
+
+void vp8_short_inv_walsh4x4_x8_c(short *input, short *output, int pitch)
+{
+    int i;
+    int a1, b1, c1, d1;
+    short *ip = input;
+    short *op = output;
+    int shortpitch = pitch >> 1;
+
+    for (i = 0; i < 4; i++)
+    {
+        a1 = ((ip[0] + ip[3]))>>WHT_UPSCALE_FACTOR;
+        b1 = ((ip[1] + ip[2]))>>WHT_UPSCALE_FACTOR;
+        c1 = ((ip[1] - ip[2]))>>WHT_UPSCALE_FACTOR;
+        d1 = ((ip[0] - ip[3]))>>WHT_UPSCALE_FACTOR;
+
+        op[0] = (a1 + b1 + 1)>>1;
+        op[1] = (c1 + d1)>>1;
+        op[2] = (a1 - b1)>>1;
+        op[3] = (d1 - c1)>>1;
+
+        ip += 4;
+        op += shortpitch;
+    }
+
+    ip = output;
+    op = output;
+    for (i = 0; i < 4; i++)
+    {
+        a1 = ip[shortpitch*0] + ip[shortpitch*3];
+        b1 = ip[shortpitch*1] + ip[shortpitch*2];
+        c1 = ip[shortpitch*1] - ip[shortpitch*2];
+        d1 = ip[shortpitch*0] - ip[shortpitch*3];
+
+
+        op[shortpitch*0] = (a1 + b1 + 1)>>1;
+        op[shortpitch*1] = (c1 + d1)>>1;
+        op[shortpitch*2] = (a1 - b1)>>1;
+        op[shortpitch*3] = (d1 - c1)>>1;
+
+        ip++;
+        op++;
+    }
+}
+
+void vp8_short_inv_walsh4x4_1_x8_c(short *in, short *out, int pitch)
+{
+    int i;
+    short tmp[4];
+    short *ip = in;
+    short *op = tmp;
+    int shortpitch = pitch >> 1;
+
+    op[0] =((ip[0]>>WHT_UPSCALE_FACTOR) + 1)>>1;
+    op[1] = op[2] = op[3] = ((ip[0]>>WHT_UPSCALE_FACTOR)>>1);
+
+
+    ip = tmp;
+    op = out;
+    for(i = 0; i<4; i++)
+    {
+        op[shortpitch*0] =(ip[0]+ 1)>>1;
+        op[shortpitch*1] = op[shortpitch*2] = op[shortpitch*3] = ip[0]>>1;
+        ip ++;
+        op ++;
+    }
+}
+
+void vp8_dc_only_inv_walsh_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
+{
+    int r, c;
+    short tmp[16];
+    vp8_short_inv_walsh4x4_1_x8_c( &input_dc, tmp, 4<<1);
+
+    for (r = 0; r < 4; r++)
+        {
+          for (c = 0; c < 4; c++)
+            {
+                int a = tmp[r*4 + c] + pred_ptr[c] ;
+                if (a < 0)
+                    a = 0;
+
+                if (a > 255)
+                    a = 255;
+
+                dst_ptr[c] = (unsigned char) a ;
+            }
+
+            dst_ptr += stride;
+            pred_ptr += pitch;
+        }
+}
+#endif
 
 void vp8_dc_only_idct_add_8x8_c(short input_dc,
                                 unsigned char *pred_ptr,
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -153,6 +153,7 @@
         int worst_allowed_q;
         int best_allowed_q;
         int cq_level;
+        int lossless;
 
         // two pass datarate control
         int two_pass_vbrbias;        // two pass datarate control tweaks
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -140,6 +140,35 @@
         xd->block[i].dequant = pc->Y1dequant[QIndex];
     }
 
+#if CONFIG_LOSSLESS
+    if(!QIndex)
+    {
+      pbi->common.rtcd.idct.idct1        = vp8_short_inv_walsh4x4_1_x8_c;
+      pbi->common.rtcd.idct.idct16       = vp8_short_inv_walsh4x4_x8_c;
+      pbi->common.rtcd.idct.idct1_scalar_add  = vp8_dc_only_inv_walsh_add_c;
+      pbi->common.rtcd.idct.iwalsh1      = vp8_short_inv_walsh4x4_1_lossless_c;
+      pbi->common.rtcd.idct.iwalsh16     = vp8_short_inv_walsh4x4_lossless_c;
+      pbi->dequant.idct_add            = vp8_dequant_idct_add_lossless_c;
+      pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_lossless_c;
+      pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_lossless_c;
+      pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_lossless_c;
+      pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_lossless_c;
+    }
+    else
+    {
+      pbi->common.rtcd.idct.idct1        = vp8_short_idct4x4llm_1_c;
+      pbi->common.rtcd.idct.idct16       = vp8_short_idct4x4llm_c;
+      pbi->common.rtcd.idct.idct1_scalar_add  = vp8_dc_only_idct_add_c;
+      pbi->common.rtcd.idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;
+      pbi->common.rtcd.idct.iwalsh16     = vp8_short_inv_walsh4x4_c;
+      pbi->dequant.idct_add            = vp8_dequant_idct_add_c;
+      pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_c;
+      pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_c;
+      pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_c;
+      pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_c;
+    }
+#endif
+
     for (i = 16; i < 24; i++)
     {
         xd->block[i].dequant = pc->UVdequant[QIndex];
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -20,6 +20,11 @@
 extern void vp8_short_idct8x8_c(short *input, short *output, int pitch);
 extern void vp8_short_idct8x8_1_c(short *input, short *output, int pitch);
 
+#if CONFIG_LOSSLESS
+extern void vp8_short_inv_walsh4x4_x8_c(short *input, short *output, int pitch);
+extern void vp8_short_inv_walsh4x4_1_x8_c(short *input, short *output, int pitch);
+#endif
+
 #ifdef DEC_DEBUG
 extern int dec_debug;
 #endif
@@ -118,6 +123,86 @@
         pred += pitch;
     }
 }
+
+#if CONFIG_LOSSLESS
+void vp8_dequant_idct_add_lossless_c(short *input, short *dq, unsigned char *pred,
+                            unsigned char *dest, int pitch, int stride)
+{
+    short output[16];
+    short *diff_ptr = output;
+    int r, c;
+    int i;
+
+    for (i = 0; i < 16; i++)
+    {
+        input[i] = dq[i] * input[i];
+    }
+
+    vp8_short_inv_walsh4x4_x8_c(input, output, 4 << 1);
+
+    vpx_memset(input, 0, 32);
+
+    for (r = 0; r < 4; r++)
+    {
+        for (c = 0; c < 4; c++)
+        {
+            int a = diff_ptr[c] + pred[c];
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dest[c] = (unsigned char) a;
+        }
+
+        dest += stride;
+        diff_ptr += 4;
+        pred += pitch;
+    }
+}
+
+void vp8_dequant_dc_idct_add_lossless_c(short *input, short *dq, unsigned char *pred,
+                               unsigned char *dest, int pitch, int stride,
+                               int Dc)
+{
+    int i;
+    short output[16];
+    short *diff_ptr = output;
+    int r, c;
+
+    input[0] = (short)Dc;
+
+    for (i = 1; i < 16; i++)
+    {
+        input[i] = dq[i] * input[i];
+    }
+
+    vp8_short_inv_walsh4x4_x8_c(input, output, 4 << 1);
+    vpx_memset(input, 0, 32);
+
+    for (r = 0; r < 4; r++)
+    {
+        for (c = 0; c < 4; c++)
+        {
+            int a = diff_ptr[c] + pred[c];
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dest[c] = (unsigned char) a;
+        }
+
+        dest += stride;
+        diff_ptr += 4;
+        pred += pitch;
+    }
+}
+#endif
 
 void vp8_dequantize_b_2x2_c(BLOCKD *d)
 {
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -96,6 +96,13 @@
 #endif
 extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block);
 
+#if CONFIG_LOSSLESS
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_lossless_c);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_lossless_c);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_lossless_c);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_lossless_c);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_lossless_c);
+#endif
 
 #ifndef vp8_dequant_block_2x2
 #define vp8_dequant_block_2x2 vp8_dequantize_b_2x2_c
--- a/vp8/decoder/idct_blk.c
+++ b/vp8/decoder/idct_blk.c
@@ -19,6 +19,12 @@
                             unsigned char *dest, int pitch, int stride);
 void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
                             unsigned char *dst_ptr, int pitch, int stride);
+#if CONFIG_LOSSLESS
+void vp8_dequant_idct_add_lossless_c(short *input, short *dq, unsigned char *pred,
+                            unsigned char *dest, int pitch, int stride);
+void vp8_dc_only_idct_add_lossless_c(short input_dc, unsigned char *pred_ptr,
+                            unsigned char *dst_ptr, int pitch, int stride);
+#endif
 
 void vp8_dequant_dc_idct_add_y_block_c
             (short *q, short *dq, unsigned char *pre,
@@ -163,4 +169,109 @@
 
   vp8_dequant_idct_add_8x8_c (q, dq, pre, dstv, 8, stride);
 }
+
+#if CONFIG_LOSSLESS
+void vp8_dequant_dc_idct_add_y_block_lossless_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc)
+{
+    int i, j;
+
+    for (i = 0; i < 4; i++)
+    {
+        for (j = 0; j < 4; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_dc_idct_add_lossless_c (q, dq, pre, dst, 16, stride, dc[0]);
+            else
+                vp8_dc_only_inv_walsh_add_c(dc[0], pre, dst, 16, stride);
+
+            q   += 16;
+            pre += 4;
+            dst += 4;
+            dc  ++;
+        }
+
+        pre += 64 - 16;
+        dst += 4*stride - 16;
+    }
+}
+
+void vp8_dequant_idct_add_y_block_lossless_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i, j;
+
+    for (i = 0; i < 4; i++)
+    {
+        for (j = 0; j < 4; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_lossless_c (q, dq, pre, dst, 16, stride);
+            else
+            {
+                vp8_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dst, 16, stride);
+                ((int *)q)[0] = 0;
+            }
+
+            q   += 16;
+            pre += 4;
+            dst += 4;
+        }
+
+        pre += 64 - 16;
+        dst += 4*stride - 16;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_lossless_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    int i, j;
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_lossless_c (q, dq, pre, dstu, 8, stride);
+            else
+            {
+                vp8_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstu, 8, stride);
+                ((int *)q)[0] = 0;
+            }
+
+            q    += 16;
+            pre  += 4;
+            dstu += 4;
+        }
+
+        pre  += 32 - 8;
+        dstu += 4*stride - 8;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_lossless_c (q, dq, pre, dstv, 8, stride);
+            else
+            {
+                vp8_dc_only_inv_walsh_add_c(q[0]*dq[0], pre, dstv, 8, stride);
+                ((int *)q)[0] = 0;
+            }
+
+            q    += 16;
+            pre  += 4;
+            dstv += 4;
+        }
+
+        pre  += 32 - 8;
+        dstv += 4*stride - 8;
+    }
+}
+#endif
 
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -11,6 +11,7 @@
 
 #include <math.h>
 #include "vpx_ports/config.h"
+#include "vp8/common/idct.h"
 
 #if CONFIG_INT_8X8FDCT
 
@@ -457,4 +458,98 @@
         ip += 4;
         op += 4;
     }
-}
\ No newline at end of file
+}
+
+#if CONFIG_LOSSLESS
+void vp8_short_walsh4x4_lossless_c(short *input, short *output, int pitch)
+{
+    int i;
+    int a1, b1, c1, d1;
+    short *ip = input;
+    short *op = output;
+    int pitch_short = pitch >>1;
+
+    for (i = 0; i < 4; i++)
+    {
+        a1 = (ip[0 * pitch_short] + ip[3 * pitch_short])>>Y2_WHT_UPSCALE_FACTOR;
+        b1 = (ip[1 * pitch_short] + ip[2 * pitch_short])>>Y2_WHT_UPSCALE_FACTOR;
+        c1 = (ip[1 * pitch_short] - ip[2 * pitch_short])>>Y2_WHT_UPSCALE_FACTOR;
+        d1 = (ip[0 * pitch_short] - ip[3 * pitch_short])>>Y2_WHT_UPSCALE_FACTOR;
+
+        op[0] = (a1 + b1 + 1)>>1;
+        op[4] = (c1 + d1)>>1;
+        op[8] = (a1 - b1)>>1;
+        op[12]= (d1 - c1)>>1;
+
+        ip++;
+        op++;
+    }
+    ip = output;
+    op = output;
+
+    for (i = 0; i < 4; i++)
+    {
+        a1 = ip[0] + ip[3];
+        b1 = ip[1] + ip[2];
+        c1 = ip[1] - ip[2];
+        d1 = ip[0] - ip[3];
+
+        op[0] = ((a1 + b1 + 1)>>1)<<Y2_WHT_UPSCALE_FACTOR;
+        op[1] = ((c1 + d1)>>1)<<Y2_WHT_UPSCALE_FACTOR;
+        op[2] = ((a1 - b1)>>1)<<Y2_WHT_UPSCALE_FACTOR;
+        op[3] = ((d1 - c1)>>1)<<Y2_WHT_UPSCALE_FACTOR;
+
+        ip += 4;
+        op += 4;
+    }
+}
+
+void vp8_short_walsh4x4_x8_c(short *input, short *output, int pitch)
+{
+    int i;
+    int a1, b1, c1, d1;
+    short *ip = input;
+    short *op = output;
+    int pitch_short = pitch >>1;
+
+    for (i = 0; i < 4; i++)
+    {
+        a1 = ip[0 * pitch_short] + ip[3 * pitch_short];
+        b1 = ip[1 * pitch_short] + ip[2 * pitch_short];
+        c1 = ip[1 * pitch_short] - ip[2 * pitch_short];
+        d1 = ip[0 * pitch_short] - ip[3 * pitch_short];
+
+        op[0] = (a1 + b1 +1)>>1;
+        op[4] = (c1 + d1)>>1;
+        op[8] = (a1 - b1)>>1;
+        op[12]= (d1 - c1)>>1;
+
+        ip++;
+        op++;
+    }
+    ip = output;
+    op = output;
+
+    for (i = 0; i < 4; i++)
+    {
+        a1 = ip[0] + ip[3];
+        b1 = ip[1] + ip[2];
+        c1 = ip[1] - ip[2];
+        d1 = ip[0] - ip[3];
+
+        op[0] = ((a1 + b1 +1)>>1)<<WHT_UPSCALE_FACTOR;
+        op[1] = ((c1 + d1)>>1)<<WHT_UPSCALE_FACTOR;
+        op[2] = ((a1 - b1)>>1)<<WHT_UPSCALE_FACTOR;
+        op[3] = ((d1 - c1)>>1)<<WHT_UPSCALE_FACTOR;
+
+        ip += 4;
+        op += 4;
+    }
+}
+
+void vp8_short_walsh8x4_x8_c(short *input, short *output, int pitch)
+{
+  vp8_short_walsh4x4_x8_c(input,   output,    pitch);
+  vp8_short_walsh4x4_x8_c(input + 4, output + 16, pitch);
+}
+#endif
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -59,6 +59,12 @@
 #endif
 extern prototype_fdct(vp8_fdct_walsh_short4x4);
 
+#if CONFIG_LOSSLESS
+extern prototype_fdct(vp8_short_walsh4x4_x8_c);
+extern prototype_fdct(vp8_short_walsh8x4_x8_c);
+extern prototype_fdct(vp8_short_walsh4x4_lossless_c);
+#endif
+
 typedef prototype_fdct(*vp8_fdct_fn_t);
 typedef struct
 {
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -727,7 +727,11 @@
     sf->quarter_pixel_search = 1;
     sf->half_pixel_search = 1;
     sf->iterative_sub_pixel = 1;
+#if CONFIG_LOSSLESS
+    sf->optimize_coefficients = 0;
+#else
     sf->optimize_coefficients = 1;
+#endif
     sf->no_skip_block4x4_search = 1;
 
     sf->first_step = 0;
@@ -1587,6 +1591,23 @@
     cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
     cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
 
+#if CONFIG_LOSSLESS
+    cpi->oxcf.lossless = oxcf->lossless;
+    if(cpi->oxcf.lossless)
+    {
+      cpi->rtcd.fdct.short4x4                  = vp8_short_walsh4x4_x8_c;
+      cpi->rtcd.fdct.fast4x4                   = vp8_short_walsh4x4_x8_c;
+      cpi->rtcd.fdct.short8x4                  = vp8_short_walsh8x4_x8_c;
+      cpi->rtcd.fdct.fast8x4                   = vp8_short_walsh8x4_x8_c;
+      cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_lossless_c;
+      cpi->common.rtcd.idct.idct1        = vp8_short_inv_walsh4x4_1_x8_c;
+      cpi->common.rtcd.idct.idct16       = vp8_short_inv_walsh4x4_x8_c;
+      cpi->common.rtcd.idct.idct1_scalar_add  = vp8_dc_only_inv_walsh_add_c;
+      cpi->common.rtcd.idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;
+      cpi->common.rtcd.idct.iwalsh16     = vp8_short_inv_walsh4x4_lossless_c;
+    }
+#endif
+
     cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL;
 
     cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
@@ -2984,6 +3005,12 @@
     {
         cm->filter_level = 0;
     }
+#if CONFIG_LOSSLESS
+    else if(cpi->oxcf.lossless)
+    {
+        cm->filter_level = 0;
+    }
+#endif
     else
     {
         struct vpx_usec_timer timer;
@@ -3139,7 +3166,11 @@
     // For 2 Pass Only used where GF/ARF prediction quality
     // is above a threshold
     cpi->zbin_mode_boost = 0;
+#if CONFIG_LOSSLESS
+    cpi->zbin_mode_boost_enabled = FALSE;
+#else
     cpi->zbin_mode_boost_enabled = TRUE;
+#endif
     if ( cpi->gfu_boost <= 400 )
     {
         cpi->zbin_mode_boost_enabled = FALSE;
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -313,6 +313,17 @@
     {
         int qzbin_factor = (vp8_dc_quant(Q,0) < 148) ? 84 : 80;
 
+#if CONFIG_LOSSLESS
+        if(cpi->oxcf.lossless)
+        {
+            if (Q==0)
+            {
+                qzbin_factor = 64;
+                qrounding_factor = 64;
+            }
+        }
+#endif
+
         // dc values
         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
         invert_quant(cpi->Y1quant[Q] + 0,
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -264,6 +264,10 @@
 
     cpi->common.txfm_mode = ALLOW_8X8;
 
+#if CONFIG_LOSSLESS
+    if(cpi->oxcf.lossless)
+        cpi->common.txfm_mode = ONLY_4X4;
+#endif
     //cpi->common.filter_level = 0;      // Reset every key frame.
     cpi->common.filter_level = cpi->common.base_qindex * 3 / 8 ;
 
@@ -291,6 +295,11 @@
 {
 
     cpi->common.txfm_mode = ALLOW_8X8;
+
+#if CONFIG_LOSSLESS
+    if(cpi->oxcf.lossless)
+        cpi->common.txfm_mode = ONLY_4X4;
+#endif
 
     if(cpi->common.refresh_alt_ref_frame)
     {
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -317,6 +317,10 @@
 
     oxcf->tuning = vp8_cfg.tuning;
 
+#if CONFIG_LOSSLESS
+    oxcf->lossless = cfg.lossless;
+#endif
+
     /*
         printf("Current VP8 Settings: \n");
         printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -592,6 +592,12 @@
          */
         unsigned int           kf_max_dist;
 
+
+        /*!\brief Enable lossless compression mode
+         *
+         * If this flag is set, the decoder will be in lossless compression mode.
+         */
+        unsigned int           lossless;
     } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */
 
 
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -966,6 +966,10 @@
         "Show quantizer histogram (n-buckets)");
 static const arg_def_t rate_hist_n         = ARG_DEF(NULL, "rate-hist", 1,
         "Show rate histogram (n-buckets)");
+#if CONFIG_LOSSLESS
+static const arg_def_t lossless_enabled = ARG_DEF(NULL, "lossless", 0,
+                                   "Enable lossless compression");
+#endif
 static const arg_def_t *main_args[] =
 {
     &debugmode,
@@ -973,6 +977,9 @@
     &deadline,
     &best_dl, &good_dl, &rt_dl,
     &verbosearg, &psnrarg, &recontest, &use_ivf, &q_hist_n, &rate_hist_n,
+#if CONFIG_LOSSLESS
+    &lossless_enabled,
+#endif
     NULL
 };
 
@@ -1670,6 +1677,10 @@
     cfg.g_w = 0;
     cfg.g_h = 0;
 
+#if CONFIG_LOSSLESS
+    cfg.lossless = 0;
+#endif
+
     /* Now parse the remainder of the parameters. */
     for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
     {
@@ -1753,10 +1764,22 @@
             cfg.kf_max_dist = arg_parse_uint(&arg);
         else if (arg_match(&arg, &kf_disabled, argi))
             cfg.kf_mode = VPX_KF_DISABLED;
+#if CONFIG_LOSSLESS
+        else if (arg_match(&arg, &lossless_enabled, argi))
+            cfg.lossless = 1;
+#endif
         else
             argj++;
     }
 
+#if CONFIG_LOSSLESS
+        if (cfg.lossless)
+        {
+            cfg.rc_min_quantizer = 0;
+            cfg.rc_max_quantizer = 0;
+        }
+#endif
+
     /* Handle codec specific options */
 #if CONFIG_VP8_ENCODER
 
@@ -1929,6 +1952,9 @@
             SHOW(kf_mode);
             SHOW(kf_min_dist);
             SHOW(kf_max_dist);
+#if CONFIG_LOSSLESS
+            SHOW(lossless);
+#endif
         }
 
         if(pass == (one_pass_only ? one_pass_only - 1 : 0)) {
--