shithub: libvpx

--- a/vp8/encoder/onyx_if.c

+++ b/vp8/encoder/onyx_if.c

@@ -21,7 +21,7 @@

 #include "vp8/common/alloccommon.h"

 #include "mcomp.h"

 #include "firstpass.h"

-#include "vpx/internal/vpx_psnr.h"

+#include "vpx_dsp/psnr.h"

 #include "vpx_scale/vpx_scale.h"

 #include "vp8/common/extend.h"

 #include "ratectrl.h"

@@ -2023,14 +2023,6 @@

         cpi->summed_weights = 0;

-    if (cpi->b_calculate_ssimg)

-    {

-        cpi->total_ssimg_y = 0;

-        cpi->total_ssimg_u = 0;

-        cpi->total_ssimg_v = 0;

-        cpi->total_ssimg_all = 0;

-    }

 #endif

     cpi->first_time_stamp_ever = 0x7FFFFFFF;

@@ -2313,45 +2305,6 @@

                                rate_err, fabs(rate_err));

-            if (cpi->b_calculate_ssimg)

-            {

-                if (cpi->oxcf.number_of_layers > 1)

-                {

-                    int i;

-                    fprintf(f, "Layer\tBitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t"

-                               "Time(us)\n");

-                    for (i=0; i<(int)cpi->oxcf.number_of_layers; i++)

-                    {

-                        double dr = (double)cpi->bytes_in_layer[i] *

-                                    8.0 / 1000.0  / time_encoded;

-                        fprintf(f, "%5d\t%7.3f\t%6.4f\t"

-                                "%6.4f\t%6.4f\t%6.4f\t%8.0f\n",

-                                i, dr,

-                                cpi->total_ssimg_y_in_layer[i] /

-                                     cpi->frames_in_layer[i],

-                                cpi->total_ssimg_u_in_layer[i] /

-                                     cpi->frames_in_layer[i],

-                                cpi->total_ssimg_v_in_layer[i] /

-                                     cpi->frames_in_layer[i],

-                                cpi->total_ssimg_all_in_layer[i] /

-                                     cpi->frames_in_layer[i],

-                                total_encode_time);

-                    }

-                }

-                else

-                {

-                    fprintf(f, "BitRate\tSSIM_Y\tSSIM_U\tSSIM_V\tSSIM_A\t"

-                               "Time(us)\n");

-                    fprintf(f, "%7.3f\t%6.4f\t%6.4f\t%6.4f\t%6.4f\t%8.0f\n", dr,

-                            cpi->total_ssimg_y / cpi->count,

-                            cpi->total_ssimg_u / cpi->count,

-                            cpi->total_ssimg_v / cpi->count,

-                            cpi->total_ssimg_all / cpi->count, total_encode_time);

-                }

-            }

             fclose(f);

 #if 0

             f = fopen("qskip.stt", "a");

@@ -5746,38 +5699,6 @@

 #endif

-            if (cpi->b_calculate_ssimg)

-            {

-                double y, u, v, frame_all;

-                frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show,

-                    &y, &u, &v);

-                if (cpi->oxcf.number_of_layers > 1)

-                {

-                    unsigned int i;

-                    for (i=cpi->current_layer;

-                         i<cpi->oxcf.number_of_layers; i++)

-                    {

-                        if (!cpi->b_calculate_psnr)

-                            cpi->frames_in_layer[i]++;

-                        cpi->total_ssimg_y_in_layer[i] += y;

-                        cpi->total_ssimg_u_in_layer[i] += u;

-                        cpi->total_ssimg_v_in_layer[i] += v;

-                        cpi->total_ssimg_all_in_layer[i] += frame_all;

-                    }

-                }

-                else

-                {

-                    cpi->total_ssimg_y += y;

-                    cpi->total_ssimg_u += u;

-                    cpi->total_ssimg_v += v;

-                    cpi->total_ssimg_all += frame_all;

-                }

-            }

--- a/vp8/encoder/onyx_int.h

+++ b/vp8/encoder/onyx_int.h

@@ -636,12 +636,6 @@

     double summed_weights;

     unsigned int tot_recode_hits;

-    double total_ssimg_y;

-    double total_ssimg_u;

-    double total_ssimg_v;

-    double total_ssimg_all;

     int b_calculate_ssimg;

 #endif

     int b_calculate_psnr;

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -16,7 +16,7 @@

 #include "./vpx_config.h"

 #include "./vpx_dsp_rtcd.h"

 #include "./vpx_scale_rtcd.h"

-#include "vpx/internal/vpx_psnr.h"

+#include "vpx_dsp/psnr.h"

 #include "vpx_dsp/vpx_dsp_common.h"

 #include "vpx_dsp/vpx_filter.h"

 #if CONFIG_INTERNAL_STATS

@@ -1804,7 +1804,6 @@

   init_level_info(&cpi->level_info);

 #if CONFIG_INTERNAL_STATS

-  cpi->b_calculate_ssimg = 0;

   cpi->b_calculate_blockiness = 1;

   cpi->b_calculate_consistency = 1;

   cpi->total_inconsistency = 0;

@@ -1828,9 +1827,6 @@

     cpi->summedp_weights = 0;

-  if (cpi->b_calculate_ssimg) {

-    cpi->ssimg.worst= 100.0;

-  }

   cpi->fastssim.worst = 100.0;

   cpi->psnrhvs.worst = 100.0;

@@ -2126,13 +2122,6 @@

           SNPRINT2(results, "\t%7.3f", consistency);

           SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);

-        if (cpi->b_calculate_ssimg) {

-          SNPRINT(headings, "\t  SSIMG\tWtSSIMG");

-          SNPRINT2(results, "\t%7.3f", cpi->ssimg.stat[ALL] / cpi->count);

-          SNPRINT2(results, "\t%7.3f", cpi->ssimg.worst);

-        }

         fprintf(f, "%s\t    Time  Rc-Err Abs Err\n", headings);

         fprintf(f, "%s\t%8.0f %7.2f %7.2f\n", results,

                 total_encode_time, rate_err, fabs(rate_err));

@@ -2226,271 +2215,15 @@

 #endif

-/* TODO(yaowu): The block_variance calls the unoptimized versions of variance()

- * and highbd_8_variance(). It should not.

- */

-static void encoder_variance(const uint8_t *a, int  a_stride,

-                             const uint8_t *b, int  b_stride,

-                             int  w, int  h, unsigned int *sse, int *sum) {

-  int i, j;

-  *sum = 0;

-  *sse = 0;

-  for (i = 0; i < h; i++) {

-    for (j = 0; j < w; j++) {

-      const int diff = a[j] - b[j];

-      *sum += diff;

-      *sse += diff * diff;

-    }

-    a += a_stride;

-    b += b_stride;

-  }

-}

-#if CONFIG_VP9_HIGHBITDEPTH

-static void encoder_highbd_variance64(const uint8_t *a8, int  a_stride,

-                                      const uint8_t *b8, int  b_stride,

-                                      int w, int h, uint64_t *sse,

-                                      int64_t *sum) {

-  int i, j;

-  uint16_t *a = CONVERT_TO_SHORTPTR(a8);

-  uint16_t *b = CONVERT_TO_SHORTPTR(b8);

-  *sum = 0;

-  *sse = 0;

-  for (i = 0; i < h; i++) {

-    for (j = 0; j < w; j++) {

-      const int diff = a[j] - b[j];

-      *sum += diff;

-      *sse += diff * diff;

-    }

-    a += a_stride;

-    b += b_stride;

-  }

-}

-static void encoder_highbd_8_variance(const uint8_t *a8, int  a_stride,

-                                      const uint8_t *b8, int  b_stride,

-                                      int w, int h,

-                                      unsigned int *sse, int *sum) {

-  uint64_t sse_long = 0;

-  int64_t sum_long = 0;

-  encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h,

-                            &sse_long, &sum_long);

-  *sse = (unsigned int)sse_long;

-  *sum = (int)sum_long;

-}

-#endif  // CONFIG_VP9_HIGHBITDEPTH

-static int64_t get_sse(const uint8_t *a, int a_stride,

-                       const uint8_t *b, int b_stride,

-                       int width, int height) {

-  const int dw = width % 16;

-  const int dh = height % 16;

-  int64_t total_sse = 0;

-  unsigned int sse = 0;

-  int sum = 0;

-  int x, y;

-  if (dw > 0) {

-    encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride,

-                     dw, height, &sse, &sum);

-    total_sse += sse;

-  }

-  if (dh > 0) {

-    encoder_variance(&a[(height - dh) * a_stride], a_stride,

-                     &b[(height - dh) * b_stride], b_stride,

-                     width - dw, dh, &sse, &sum);

-    total_sse += sse;

-  }

-  for (y = 0; y < height / 16; ++y) {

-    const uint8_t *pa = a;

-    const uint8_t *pb = b;

-    for (x = 0; x < width / 16; ++x) {

-      vpx_mse16x16(pa, a_stride, pb, b_stride, &sse);

-      total_sse += sse;

-      pa += 16;

-      pb += 16;

-    }

-    a += 16 * a_stride;

-    b += 16 * b_stride;

-  }

-  return total_sse;

-}

-#if CONFIG_VP9_HIGHBITDEPTH

-static int64_t highbd_get_sse_shift(const uint8_t *a8, int a_stride,

-                                    const uint8_t *b8, int b_stride,

-                                    int width, int height,

-                                    unsigned int input_shift) {

-  const uint16_t *a = CONVERT_TO_SHORTPTR(a8);

-  const uint16_t *b = CONVERT_TO_SHORTPTR(b8);

-  int64_t total_sse = 0;

-  int x, y;

-  for (y = 0; y < height; ++y) {

-    for (x = 0; x < width; ++x) {

-      int64_t diff;

-      diff = (a[x] >> input_shift) - (b[x] >> input_shift);

-      total_sse += diff * diff;

-    }

-    a += a_stride;

-    b += b_stride;

-  }

-  return total_sse;

-}

-static int64_t highbd_get_sse(const uint8_t *a, int a_stride,

-                              const uint8_t *b, int b_stride,

-                              int width, int height) {

-  int64_t total_sse = 0;

-  int x, y;

-  const int dw = width % 16;

-  const int dh = height % 16;

-  unsigned int sse = 0;

-  int sum = 0;

-  if (dw > 0) {

-    encoder_highbd_8_variance(&a[width - dw], a_stride,

-                              &b[width - dw], b_stride,

-                              dw, height, &sse, &sum);

-    total_sse += sse;

-  }

-  if (dh > 0) {

-    encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride,

-                              &b[(height - dh) * b_stride], b_stride,

-                              width - dw, dh, &sse, &sum);

-    total_sse += sse;

-  }

-  for (y = 0; y < height / 16; ++y) {

-    const uint8_t *pa = a;

-    const uint8_t *pb = b;

-    for (x = 0; x < width / 16; ++x) {

-      vpx_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse);

-      total_sse += sse;

-      pa += 16;

-      pb += 16;

-    }

-    a += 16 * a_stride;

-    b += 16 * b_stride;

-  }

-  return total_sse;

-}

-#endif  // CONFIG_VP9_HIGHBITDEPTH

-typedef struct {

-  double psnr[4];       // total/y/u/v

-  uint64_t sse[4];      // total/y/u/v

-  uint32_t samples[4];  // total/y/u/v

-} PSNR_STATS;

-#if CONFIG_VP9_HIGHBITDEPTH

-static void calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,

-                             const YV12_BUFFER_CONFIG *b,

-                             PSNR_STATS *psnr,

-                             unsigned int bit_depth,

-                             unsigned int in_bit_depth) {

-  const int widths[3] =

-      {a->y_crop_width,  a->uv_crop_width,  a->uv_crop_width };

-  const int heights[3] =

-      {a->y_crop_height, a->uv_crop_height, a->uv_crop_height};

-  const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer,  a->v_buffer };

-  const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride};

-  const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer,  b->v_buffer };

-  const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride};

-  int i;

-  uint64_t total_sse = 0;

-  uint32_t total_samples = 0;

-  const double peak = (double)((1 << in_bit_depth) - 1);

-  const unsigned int input_shift = bit_depth - in_bit_depth;

-  for (i = 0; i < 3; ++i) {

-    const int w = widths[i];

-    const int h = heights[i];

-    const uint32_t samples = w * h;

-    uint64_t sse;

-    if (a->flags & YV12_FLAG_HIGHBITDEPTH) {

-      if (input_shift) {

-        sse = highbd_get_sse_shift(a_planes[i], a_strides[i],

-                                   b_planes[i], b_strides[i], w, h,

-                                   input_shift);

-      } else {

-        sse = highbd_get_sse(a_planes[i], a_strides[i],

-                             b_planes[i], b_strides[i], w, h);

-      }

-    } else {

-      sse = get_sse(a_planes[i], a_strides[i],

-                    b_planes[i], b_strides[i],

-                    w, h);

-    }

-    psnr->sse[1 + i] = sse;

-    psnr->samples[1 + i] = samples;

-    psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);

-    total_sse += sse;

-    total_samples += samples;

-  }

-  psnr->sse[0] = total_sse;

-  psnr->samples[0] = total_samples;

-  psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,

-                                  (double)total_sse);

-}

-#else  // !CONFIG_VP9_HIGHBITDEPTH

-static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,

-                      PSNR_STATS *psnr) {

-  static const double peak = 255.0;

-  const int widths[3]        = {

-      a->y_crop_width, a->uv_crop_width, a->uv_crop_width};

-  const int heights[3]       = {

-      a->y_crop_height, a->uv_crop_height, a->uv_crop_height};

-  const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer};

-  const int a_strides[3]     = {a->y_stride, a->uv_stride, a->uv_stride};

-  const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer};

-  const int b_strides[3]     = {b->y_stride, b->uv_stride, b->uv_stride};

-  int i;

-  uint64_t total_sse = 0;

-  uint32_t total_samples = 0;

-  for (i = 0; i < 3; ++i) {

-    const int w = widths[i];

-    const int h = heights[i];

-    const uint32_t samples = w * h;

-    const uint64_t sse = get_sse(a_planes[i], a_strides[i],

-                                 b_planes[i], b_strides[i],

-                                 w, h);

-    psnr->sse[1 + i] = sse;

-    psnr->samples[1 + i] = samples;

-    psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);

-    total_sse += sse;

-    total_samples += samples;

-  }

-  psnr->sse[0] = total_sse;

-  psnr->samples[0] = total_samples;

-  psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,

-                                  (double)total_sse);

-}

-#endif  // CONFIG_VP9_HIGHBITDEPTH

 static void generate_psnr_packet(VP9_COMP *cpi) {

   struct vpx_codec_cx_pkt pkt;

   int i;

   PSNR_STATS psnr;

 #if CONFIG_VP9_HIGHBITDEPTH

-  calc_highbd_psnr(cpi->Source, cpi->common.frame_to_show, &psnr,

-                   cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);

+  vpx_calc_highbd_psnr(cpi->Source, cpi->common.frame_to_show, &psnr,

+                       cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth);

 #else

-  calc_psnr(cpi->Source, cpi->common.frame_to_show, &psnr);

+  vpx_calc_psnr(cpi->Source, cpi->common.frame_to_show, &psnr);

 #endif

   for (i = 0; i < 4; ++i) {

@@ -3160,12 +2893,12 @@

 #if CONFIG_VP9_HIGHBITDEPTH

   if (cm->use_highbitdepth) {

-    recon_err = vp9_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

+    recon_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

   } else {

-    recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

+    recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

 #else

-  recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

+  recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

 #endif  // CONFIG_VP9_HIGHBITDEPTH

@@ -3721,12 +3454,12 @@

 #if CONFIG_VP9_HIGHBITDEPTH

         if (cm->use_highbitdepth) {

-          kf_err = vp9_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

+          kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

         } else {

-          kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

+          kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

 #else

-        kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

+        kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

 #endif  // CONFIG_VP9_HIGHBITDEPTH

         // Prevent possible divide by zero error below for perfect KF

@@ -4142,13 +3875,13 @@

   if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {

 #if CONFIG_VP9_HIGHBITDEPTH

     if (cm->use_highbitdepth) {

-      cpi->ambient_err = vp9_highbd_get_y_sse(cpi->Source,

+      cpi->ambient_err = vpx_highbd_get_y_sse(cpi->Source,

                                               get_frame_new_buffer(cm));

     } else {

-      cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

+      cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

 #else

-    cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

+    cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm));

 #endif  // CONFIG_VP9_HIGHBITDEPTH

@@ -4872,7 +4605,15 @@

     cpi->bytes += (int)(*size);

     if (cm->show_frame) {

+      uint32_t bit_depth = 8;

+      uint32_t in_bit_depth = 8;

       cpi->count++;

+#if CONFIG_VP9_HIGHBITDEPTH

+    if (cm->use_highbitdepth) {

+      in_bit_depth = cpi->oxcf.input_bit_depth;

+      bit_depth = cm->bit_depth;

+    }

+#endif

       if (cpi->b_calculate_psnr) {

         YV12_BUFFER_CONFIG *orig = cpi->Source;

@@ -4880,10 +4621,10 @@

         YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer;

         PSNR_STATS psnr;

 #if CONFIG_VP9_HIGHBITDEPTH

-        calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,

-                         cpi->oxcf.input_bit_depth);

+        vpx_calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd,

+                             in_bit_depth);

 #else

-        calc_psnr(orig, recon, &psnr);

+        vpx_calc_psnr(orig, recon, &psnr);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

         adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3],

@@ -4896,7 +4637,7 @@

           PSNR_STATS psnr2;

           double frame_ssim2 = 0, weight = 0;

 #if CONFIG_VP9_POSTPROC

-          if (vpx_alloc_frame_buffer(&cm->post_proc_buffer,

+          if (vpx_alloc_frame_buffer(pp,

                                      recon->y_crop_width, recon->y_crop_height,

                                      cm->subsampling_x, cm->subsampling_y,

 #if CONFIG_VP9_HIGHBITDEPTH

@@ -4908,16 +4649,16 @@

                                "Failed to allocate post processing buffer");

-          vp9_deblock(cm->frame_to_show, &cm->post_proc_buffer,

+          vp9_deblock(cm->frame_to_show, pp,

                       cm->lf.filter_level * 10 / 6);

 #endif

           vpx_clear_system_state();

 #if CONFIG_VP9_HIGHBITDEPTH

-          calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,

+          vpx_calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,

                            cpi->oxcf.input_bit_depth);

 #else

-          calc_psnr(orig, pp, &psnr2);

+          vpx_calc_psnr(orig, pp, &psnr2);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

           cpi->totalp_sq_error += psnr2.sse[0];

@@ -4928,7 +4669,7 @@

 #if CONFIG_VP9_HIGHBITDEPTH

           if (cm->use_highbitdepth) {

             frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight,

-                                               (int)cm->bit_depth);

+                                               bit_depth, in_bit_depth);

           } else {

             frame_ssim2 = vpx_calc_ssim(orig, recon, &weight);

@@ -4943,12 +4684,12 @@

 #if CONFIG_VP9_HIGHBITDEPTH

           if (cm->use_highbitdepth) {

             frame_ssim2 = vpx_highbd_calc_ssim(

-                orig, &cm->post_proc_buffer, &weight, (int)cm->bit_depth);

+                orig, pp, &weight, bit_depth, in_bit_depth);

           } else {

-            frame_ssim2 = vpx_calc_ssim(orig, &cm->post_proc_buffer, &weight);

+            frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);

 #else

-          frame_ssim2 = vpx_calc_ssim(orig, &cm->post_proc_buffer, &weight);

+          frame_ssim2 = vpx_calc_ssim(orig, pp, &weight);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

           cpi->summedp_quality += frame_ssim2 * weight;

@@ -5000,37 +4741,16 @@

-      if (cpi->b_calculate_ssimg) {

-        double y, u, v, frame_all;

-#if CONFIG_VP9_HIGHBITDEPTH

-        if (cm->use_highbitdepth) {

-          frame_all = vpx_highbd_calc_ssimg(cpi->Source, cm->frame_to_show, &y,

-                                            &u, &v, (int)cm->bit_depth);

-        } else {

-          frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u,

-                                     &v);

-        }

-#else

-        frame_all = vpx_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v);

-#endif  // CONFIG_VP9_HIGHBITDEPTH

-        adjust_image_stat(y, u, v, frame_all, &cpi->ssimg);

-      }

-#if CONFIG_VP9_HIGHBITDEPTH

-      if (!cm->use_highbitdepth)

-#endif

         double y, u, v, frame_all;

         frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,

-                                      &v);

+                                      &v, bit_depth, in_bit_depth);

         adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);

-        /* TODO(JBB): add 10/12 bit support */

-#if CONFIG_VP9_HIGHBITDEPTH

-      if (!cm->use_highbitdepth)

-#endif

         double y, u, v, frame_all;

-        frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v);

+        frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v,

+                                bit_depth, in_bit_depth);

         adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);

@@ -5157,28 +4877,6 @@

   cpi->use_svc = use_svc;

   return;

-int64_t vp9_get_y_sse(const YV12_BUFFER_CONFIG *a,

-                      const YV12_BUFFER_CONFIG *b) {

-  assert(a->y_crop_width == b->y_crop_width);

-  assert(a->y_crop_height == b->y_crop_height);

-  return get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,

-                 a->y_crop_width, a->y_crop_height);

-}

-#if CONFIG_VP9_HIGHBITDEPTH

-int64_t vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,

-                             const YV12_BUFFER_CONFIG *b) {

-  assert(a->y_crop_width == b->y_crop_width);

-  assert(a->y_crop_height == b->y_crop_height);

-  assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);

-  assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);

-  return highbd_get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,

-                        a->y_crop_width, a->y_crop_height);

-}

-#endif  // CONFIG_VP9_HIGHBITDEPTH

 int vp9_get_quantizer(VP9_COMP *cpi) {

   return cpi->common.base_qindex;

--- a/vp9/encoder/vp9_picklpf.c

+++ b/vp9/encoder/vp9_picklpf.c

@@ -12,7 +12,7 @@

 #include <limits.h>

 #include "./vpx_scale_rtcd.h"

+#include "vpx_dsp/psnr.h"

 #include "vpx_mem/vpx_mem.h"

 #include "vpx_ports/mem.h"

@@ -52,12 +52,12 @@

 #if CONFIG_VP9_HIGHBITDEPTH

   if (cm->use_highbitdepth) {

-    filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show);

+    filt_err = vpx_highbd_get_y_sse(sd, cm->frame_to_show);

   } else {

-    filt_err = vp9_get_y_sse(sd, cm->frame_to_show);

+    filt_err = vpx_get_y_sse(sd, cm->frame_to_show);

 #else

-  filt_err = vp9_get_y_sse(sd, cm->frame_to_show);

+  filt_err = vpx_get_y_sse(sd, cm->frame_to_show);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

   // Re-instate the unfiltered frame

--- a/vpx/internal/vpx_psnr.h

+++ /dev/null

@@ -1,34 +1,0 @@

-/*

- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.

- *

- *  Use of this source code is governed by a BSD-style license

- *  that can be found in the LICENSE file in the root of the source

- *  tree. An additional intellectual property rights grant can be found

- *  in the file PATENTS.  All contributing project authors may

- *  be found in the AUTHORS file in the root of the source tree.

- */

-#ifndef VPX_INTERNAL_VPX_PSNR_H_

-#define VPX_INTERNAL_VPX_PSNR_H_

-#ifdef __cplusplus

-extern "C" {

-#endif

-// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t

-/*!\brief Converts SSE to PSNR

- *

- * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR).

- *

- * \param[in]    samples       Number of samples

- * \param[in]    peak          Max sample value

- * \param[in]    sse           Sum of squared errors

- */

-double vpx_sse_to_psnr(double samples, double peak, double sse);

-#ifdef __cplusplus

-}  // extern "C"

-#endif

-#endif  // VPX_INTERNAL_VPX_PSNR_H_

--- a/vpx/src/vpx_psnr.c

+++ /dev/null

@@ -1,24 +1,0 @@

-/*

- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.

- *

- *  Use of this source code is governed by a BSD-style license

- *  that can be found in the LICENSE file in the root of the source

- *  tree. An additional intellectual property rights grant can be found

- *  in the file PATENTS.  All contributing project authors may

- *  be found in the AUTHORS file in the root of the source tree.

- */

-#include <math.h>

-#include "vpx/internal/vpx_psnr.h"

-#define MAX_PSNR 100.0

-double vpx_sse_to_psnr(double samples, double peak, double sse) {

-  if (sse > 0.0) {

-    const double psnr = 10.0 * log10(samples * peak * peak / sse);

-    return psnr > MAX_PSNR ? MAX_PSNR : psnr;

-  } else {

-    return MAX_PSNR;

-  }

-}

--- a/vpx/vpx_codec.mk

+++ b/vpx/vpx_codec.mk

@@ -36,10 +36,8 @@

 API_SRCS-yes += src/vpx_encoder.c

 API_SRCS-yes += vpx_encoder.h

 API_SRCS-yes += internal/vpx_codec_internal.h

-API_SRCS-yes += internal/vpx_psnr.h

 API_SRCS-yes += src/vpx_codec.c

 API_SRCS-yes += src/vpx_image.c

-API_SRCS-yes += src/vpx_psnr.c

 API_SRCS-yes += vpx_codec.h

 API_SRCS-yes += vpx_codec.mk

 API_SRCS-yes += vpx_frame_buffer.h

--- a/vpx_dsp/fastssim.c

+++ b/vpx_dsp/fastssim.c

@@ -10,6 +10,7 @@

  *  This code was originally written by: Nathan E. Egge, at the Daala

  *  project.

*/

+#include <assert.h>

 #include <math.h>

 #include <stdlib.h>

 #include <string.h>

@@ -17,19 +18,24 @@

 #include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/ssim.h"

 #include "vpx_ports/system_state.h"

-/* TODO(jbb): High bit depth version of this code needed */

 typedef struct fs_level fs_level;

 typedef struct fs_ctx fs_ctx;

 #define SSIM_C1 (255 * 255 * 0.01 * 0.01)

 #define SSIM_C2 (255 * 255 * 0.03 * 0.03)

+#if CONFIG_VP9_HIGHBITDEPTH

+#define SSIM_C1_10 (1023 * 1023 * 0.01 * 0.01)

+#define SSIM_C1_12 (4095 * 4095 * 0.01 * 0.01)

+#define SSIM_C2_10 (1023 * 1023 * 0.03 * 0.03)

+#define SSIM_C2_12 (4095 * 4095 * 0.03 * 0.03)

+#endif

 #define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b))

 #define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b))

 struct fs_level {

-  uint16_t *im1;

-  uint16_t *im2;

+  uint32_t *im1;

+  uint32_t *im2;

   double *ssim;

   int w;

   int h;

@@ -80,7 +86,7 @@

     level_size += sizeof(*_ctx->level[l].ssim) - 1;

     level_size /= sizeof(*_ctx->level[l].ssim);

     level_size *= sizeof(*_ctx->level[l].ssim);

-    _ctx->level[l].im1 = (uint16_t *) data;

+    _ctx->level[l].im1 = (uint32_t *)data;

     _ctx->level[l].im2 = _ctx->level[l].im1 + im_size;

     data += level_size;

     _ctx->level[l].ssim = (double *) data;

@@ -96,10 +102,10 @@

 static void fs_downsample_level(fs_ctx *_ctx, int _l) {

-  const uint16_t *src1;

-  const uint16_t *src2;

-  uint16_t *dst1;

-  uint16_t *dst2;

+  const uint32_t *src1;

+  const uint32_t *src2;

+  uint32_t *dst1;

+  uint32_t *dst2;

   int w2;

   int h2;

   int w;

@@ -132,11 +138,12 @@

-static void fs_downsample_level0(fs_ctx *_ctx, const unsigned char *_src1,

-                                 int _s1ystride, const unsigned char *_src2,

-                                 int _s2ystride, int _w, int _h) {

-  uint16_t *dst1;

-  uint16_t *dst2;

+static void fs_downsample_level0(fs_ctx *_ctx, const uint8_t *_src1,

+                                 int _s1ystride, const uint8_t *_src2,

+                                 int _s2ystride, int _w, int _h,

+                                 uint32_t bd, uint32_t shift) {

+  uint32_t *dst1;

+  uint32_t *dst2;

   int w;

   int h;

   int i;

@@ -155,21 +162,34 @@

       int i1;

       i0 = 2 * i;

       i1 = FS_MINI(i0 + 1, _w);

-      dst1[j * w + i] = _src1[j0 * _s1ystride + i0]

-          + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0]

-          + _src1[j1 * _s1ystride + i1];

-      dst2[j * w + i] = _src2[j0 * _s2ystride + i0]

-          + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0]

-          + _src2[j1 * _s2ystride + i1];

+      if (bd == 8 && shift == 0) {

+        dst1[j * w + i] = _src1[j0 * _s1ystride + i0]

+            + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0]

+            + _src1[j1 * _s1ystride + i1];

+        dst2[j * w + i] = _src2[j0 * _s2ystride + i0]

+            + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0]

+            + _src2[j1 * _s2ystride + i1];

+      } else {

+        uint16_t * src1s = CONVERT_TO_SHORTPTR(_src1);

+        uint16_t * src2s = CONVERT_TO_SHORTPTR(_src2);

+        dst1[j * w + i] = (src1s[j0 * _s1ystride + i0] >> shift)

+              + (src1s[j0 * _s1ystride + i1] >> shift)

+              + (src1s[j1 * _s1ystride + i0] >> shift)

+              + (src1s[j1 * _s1ystride + i1] >> shift);

+        dst2[j * w + i] = (src2s[j0 * _s2ystride + i0] >> shift)

+              + (src2s[j0 * _s2ystride + i1] >> shift)

+              + (src2s[j1 * _s2ystride + i0] >> shift)

+              + (src2s[j1 * _s2ystride + i1] >> shift);

+      }

-static void fs_apply_luminance(fs_ctx *_ctx, int _l) {

+static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) {

   unsigned *col_sums_x;

   unsigned *col_sums_y;

-  uint16_t *im1;

-  uint16_t *im2;

+  uint32_t *im1;

+  uint32_t *im2;

   double *ssim;

   double c1;

   int w;

@@ -178,6 +198,15 @@

   int j1offs;

   int i;

   int j;

+  double ssim_c1 = SSIM_C1;

+#if CONFIG_VP9_HIGHBITDEPTH

+  if (bit_depth == 10)

+    ssim_c1 = SSIM_C1_10;

+  if (bit_depth == 12)

+    ssim_c1 = SSIM_C1_12;

+#else

+  assert(bit_depth == 8);

+#endif

   w = _ctx->level[_l].w;

   h = _ctx->level[_l].h;

   col_sums_x = _ctx->col_buf;

@@ -196,7 +225,7 @@

       col_sums_y[i] += im2[j1offs + i];

   ssim = _ctx->level[_l].ssim;

-  c1 = (double) (SSIM_C1 * 4096 * (1 << 4 * _l));

+  c1 = (double) (ssim_c1 * 4096 * (1 << 4 * _l));

   for (j = 0; j < h; j++) {

     unsigned mux;

     unsigned muy;

@@ -294,9 +323,9 @@

} \

   while (0)

-static void fs_calc_structure(fs_ctx *_ctx, int _l) {

-  uint16_t *im1;

-  uint16_t *im2;

+static void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) {

+  uint32_t *im1;

+  uint32_t *im2;

   unsigned *gx_buf;

   unsigned *gy_buf;

   double *ssim;

@@ -309,6 +338,16 @@

   int h;

   int i;

   int j;

+  double ssim_c2 = SSIM_C2;

+#if CONFIG_VP9_HIGHBITDEPTH

+  if (bit_depth == 10)

+    ssim_c2 = SSIM_C2_10;

+  if (bit_depth == 12)

+    ssim_c2 = SSIM_C2_12;

+#else

+  assert(bit_depth == 8);

+#endif

   w = _ctx->level[_l].w;

   h = _ctx->level[_l].h;

   im1 = _ctx->level[_l].im1;

@@ -318,7 +357,7 @@

   stride = w + 8;

   gy_buf = gx_buf + 8 * stride;

   memset(gx_buf, 0, 2 * 8 * stride * sizeof(*gx_buf));

-  c2 = SSIM_C2 * (1 << 4 * _l) * 16 * 104;

+  c2 = ssim_c2 * (1 << 4 * _l) * 16 * 104;

   for (j = 0; j < h + 4; j++) {

     if (j < h - 1) {

       for (i = 0; i < w - 1; i++) {

@@ -326,11 +365,11 @@

         unsigned g2;

         unsigned gx;

         unsigned gy;

-        g1 = abs(im1[(j + 1) * w + i + 1] - im1[j * w + i]);

-        g2 = abs(im1[(j + 1) * w + i] - im1[j * w + i + 1]);

+        g1 = abs((int)im1[(j + 1) * w + i + 1] - (int)im1[j * w + i]);

+        g2 = abs((int)im1[(j + 1) * w + i] - (int)im1[j * w + i + 1]);

         gx = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2);

-        g1 = abs(im2[(j + 1) * w + i + 1] - im2[j * w + i]);

-        g2 = abs(im2[(j + 1) * w + i] - im2[j * w + i + 1]);

+        g1 = abs((int)im2[(j + 1) * w + i + 1] - (int)im2[j * w + i]);

+        g2 = abs((int)im2[(j + 1) * w + i] - (int)im2[j * w + i + 1]);

         gy = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2);

         gx_buf[(j & 7) * stride + i + 4] = gx;

         gy_buf[(j & 7) * stride + i + 4] = gy;

@@ -421,48 +460,55 @@

   return pow(ret / (w * h), FS_WEIGHTS[_l]);

-static double calc_ssim(const unsigned char *_src, int _systride,

-                 const unsigned char *_dst, int _dystride, int _w, int _h) {

+static double convert_ssim_db(double _ssim, double _weight) {

+  assert(_weight >= _ssim);

+  if ((_weight - _ssim) < 1e-10)

+    return MAX_SSIM_DB;

+  return 10 * (log10(_weight) - log10(_weight - _ssim));

+}

+static double calc_ssim(const uint8_t *_src, int _systride,

+                        const uint8_t *_dst, int _dystride,

+                        int _w, int _h, uint32_t _bd, uint32_t _shift) {

   fs_ctx ctx;

   double ret;

   int l;

   ret = 1;

   fs_ctx_init(&ctx, _w, _h, FS_NLEVELS);

-  fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h);

+  fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride,

+                       _w, _h, _bd, _shift);

   for (l = 0; l < FS_NLEVELS - 1; l++) {

-    fs_calc_structure(&ctx, l);

+    fs_calc_structure(&ctx, l, _bd);

     ret *= fs_average(&ctx, l);

     fs_downsample_level(&ctx, l + 1);

-  fs_calc_structure(&ctx, l);

-  fs_apply_luminance(&ctx, l);

+  fs_calc_structure(&ctx, l, _bd);

+  fs_apply_luminance(&ctx, l, _bd);

   ret *= fs_average(&ctx, l);

   fs_ctx_clear(&ctx);

   return ret;

-static double convert_ssim_db(double _ssim, double _weight) {

-  return 10 * (log10(_weight) - log10(_weight - _ssim));

-}

 double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source,

                          const YV12_BUFFER_CONFIG *dest,

-                         double *ssim_y, double *ssim_u, double *ssim_v) {

+                         double *ssim_y, double *ssim_u, double *ssim_v,

+                         uint32_t bd, uint32_t in_bd) {

   double ssimv;

+  uint32_t bd_shift = 0;

   vpx_clear_system_state();

+  assert(bd >= in_bd);

+  bd_shift = bd - in_bd;

   *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer,

                       dest->y_stride, source->y_crop_width,

-                      source->y_crop_height);

+                      source->y_crop_height, in_bd, bd_shift);

   *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer,

                       dest->uv_stride, source->uv_crop_width,

-                      source->uv_crop_height);

+                      source->uv_crop_height, in_bd, bd_shift);

   *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer,

                       dest->uv_stride, source->uv_crop_width,

-                      source->uv_crop_height);

-  ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v));

+                      source->uv_crop_height, in_bd, bd_shift);

+  ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v));

   return convert_ssim_db(ssimv, 1.0);

--- /dev/null

+++ b/vpx_dsp/psnr.c

@@ -1,0 +1,296 @@

+/*

+*  Copyright (c) 2016 The WebM project authors. All Rights Reserved.

+*

+*  Use of this source code is governed by a BSD-style license

+*  that can be found in the LICENSE file in the root of the source

+*  tree. An additional intellectual property rights grant can be found

+*  in the file PATENTS.  All contributing project authors may

+*  be found in the AUTHORS file in the root of the source tree.

+*/

+#include <math.h>

+#include <assert.h>

+#include "./vpx_dsp_rtcd.h"

+#include "vpx_dsp/psnr.h"

+#include "vpx_scale/yv12config.h"

+double vpx_sse_to_psnr(double samples, double peak, double sse) {

+  if (sse > 0.0) {

+    const double psnr = 10.0 * log10(samples * peak * peak / sse);

+    return psnr > MAX_PSNR ? MAX_PSNR : psnr;

+  } else {

+    return MAX_PSNR;

+  }

+}

+/* TODO(yaowu): The block_variance calls the unoptimized versions of variance()

+* and highbd_8_variance(). It should not.

+*/

+static void encoder_variance(const uint8_t *a, int  a_stride,

+  const uint8_t *b, int  b_stride,

+  int  w, int  h, unsigned int *sse, int *sum) {

+  int i, j;

+  *sum = 0;

+  *sse = 0;

+  for (i = 0; i < h; i++) {

+    for (j = 0; j < w; j++) {

+      const int diff = a[j] - b[j];

+      *sum += diff;

+      *sse += diff * diff;

+    }

+    a += a_stride;

+    b += b_stride;

+  }

+}

+#if CONFIG_VP9_HIGHBITDEPTH

+static void encoder_highbd_variance64(const uint8_t *a8, int  a_stride,

+  const uint8_t *b8, int  b_stride,

+  int w, int h, uint64_t *sse,

+  uint64_t *sum) {

+  int i, j;

+  uint16_t *a = CONVERT_TO_SHORTPTR(a8);

+  uint16_t *b = CONVERT_TO_SHORTPTR(b8);

+  *sum = 0;

+  *sse = 0;

+  for (i = 0; i < h; i++) {

+    for (j = 0; j < w; j++) {

+      const int diff = a[j] - b[j];

+      *sum += diff;

+      *sse += diff * diff;

+    }

+    a += a_stride;

+    b += b_stride;

+  }

+}

+static void encoder_highbd_8_variance(const uint8_t *a8, int  a_stride,

+  const uint8_t *b8, int  b_stride,

+  int w, int h,

+  unsigned int *sse, int *sum) {

+  uint64_t sse_long = 0;

+  uint64_t sum_long = 0;

+  encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h,

+    &sse_long, &sum_long);

+  *sse = (unsigned int)sse_long;

+  *sum = (int)sum_long;

+}

+#endif  // CONFIG_VP9_HIGHBITDEPTH

+static int64_t get_sse(const uint8_t *a, int a_stride,

+  const uint8_t *b, int b_stride,

+  int width, int height) {

+  const int dw = width % 16;

+  const int dh = height % 16;

+  int64_t total_sse = 0;

+  unsigned int sse = 0;

+  int sum = 0;

+  int x, y;

+  if (dw > 0) {

+    encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride,

+      dw, height, &sse, &sum);

+    total_sse += sse;

+  }

+  if (dh > 0) {

+    encoder_variance(&a[(height - dh) * a_stride], a_stride,

+      &b[(height - dh) * b_stride], b_stride,

+      width - dw, dh, &sse, &sum);

+    total_sse += sse;

+  }

+  for (y = 0; y < height / 16; ++y) {

+    const uint8_t *pa = a;

+    const uint8_t *pb = b;

+    for (x = 0; x < width / 16; ++x) {

+      vpx_mse16x16(pa, a_stride, pb, b_stride, &sse);

+      total_sse += sse;

+      pa += 16;

+      pb += 16;

+    }

+    a += 16 * a_stride;

+    b += 16 * b_stride;

+  }

+  return total_sse;

+}

+#if CONFIG_VP9_HIGHBITDEPTH

+static int64_t highbd_get_sse_shift(const uint8_t *a8, int a_stride,

+                                    const uint8_t *b8, int b_stride,

+                                    int width, int height,

+                                    unsigned int input_shift) {

+  const uint16_t *a = CONVERT_TO_SHORTPTR(a8);

+  const uint16_t *b = CONVERT_TO_SHORTPTR(b8);

+  int64_t total_sse = 0;

+  int x, y;

+  for (y = 0; y < height; ++y) {

+    for (x = 0; x < width; ++x) {

+      int64_t diff;

+      diff = (a[x] >> input_shift) - (b[x] >> input_shift);

+      total_sse += diff * diff;

+    }

+    a += a_stride;

+    b += b_stride;

+  }

+  return total_sse;

+}

+static int64_t highbd_get_sse(const uint8_t *a, int a_stride,

+                              const uint8_t *b, int b_stride,

+                              int width, int height) {

+  int64_t total_sse = 0;

+  int x, y;

+  const int dw = width % 16;

+  const int dh = height % 16;

+  unsigned int sse = 0;

+  int sum = 0;

+  if (dw > 0) {

+    encoder_highbd_8_variance(&a[width - dw], a_stride,

+      &b[width - dw], b_stride,

+      dw, height, &sse, &sum);

+    total_sse += sse;

+  }

+  if (dh > 0) {

+    encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride,

+      &b[(height - dh) * b_stride], b_stride,

+      width - dw, dh, &sse, &sum);

+    total_sse += sse;

+  }

+  for (y = 0; y < height / 16; ++y) {

+    const uint8_t *pa = a;

+    const uint8_t *pb = b;

+    for (x = 0; x < width / 16; ++x) {

+      vpx_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse);

+      total_sse += sse;

+      pa += 16;

+      pb += 16;

+    }

+    a += 16 * a_stride;

+    b += 16 * b_stride;

+  }

+  return total_sse;

+}

+#endif  // CONFIG_VP9_HIGHBITDEPTH

+int64_t vpx_get_y_sse(const YV12_BUFFER_CONFIG *a,

+  const YV12_BUFFER_CONFIG *b) {

+  assert(a->y_crop_width == b->y_crop_width);

+  assert(a->y_crop_height == b->y_crop_height);

+  return get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,

+    a->y_crop_width, a->y_crop_height);

+}

+#if CONFIG_VP9_HIGHBITDEPTH

+int64_t vpx_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,

+  const YV12_BUFFER_CONFIG *b) {

+  assert(a->y_crop_width == b->y_crop_width);

+  assert(a->y_crop_height == b->y_crop_height);

+  assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);

+  assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);

+  return highbd_get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,

+    a->y_crop_width, a->y_crop_height);

+}

+#endif  // CONFIG_VP9_HIGHBITDEPTH

+#if CONFIG_VP9_HIGHBITDEPTH

+void vpx_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,

+                          const YV12_BUFFER_CONFIG *b,

+                          PSNR_STATS *psnr, uint32_t bit_depth,

+                          uint32_t in_bit_depth) {

+  const int widths[3] =

+  { a->y_crop_width, a->uv_crop_width, a->uv_crop_width };

+  const int heights[3] =

+  { a->y_crop_height, a->uv_crop_height, a->uv_crop_height };

+  const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer };

+  const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride };

+  const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer };

+  const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride };

+  int i;

+  uint64_t total_sse = 0;

+  uint32_t total_samples = 0;

+  const double peak = (double)((1 << in_bit_depth) - 1);

+  const unsigned int input_shift = bit_depth - in_bit_depth;

+  for (i = 0; i < 3; ++i) {

+    const int w = widths[i];

+    const int h = heights[i];

+    const uint32_t samples = w * h;

+    uint64_t sse;

+    if (a->flags & YV12_FLAG_HIGHBITDEPTH) {

+      if (input_shift) {

+        sse = highbd_get_sse_shift(a_planes[i], a_strides[i],

+          b_planes[i], b_strides[i], w, h,

+          input_shift);

+      } else {

+        sse = highbd_get_sse(a_planes[i], a_strides[i],

+          b_planes[i], b_strides[i], w, h);

+      }

+    } else {

+      sse = get_sse(a_planes[i], a_strides[i],

+        b_planes[i], b_strides[i],

+        w, h);

+    }

+    psnr->sse[1 + i] = sse;

+    psnr->samples[1 + i] = samples;

+    psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);

+    total_sse += sse;

+    total_samples += samples;

+  }

+  psnr->sse[0] = total_sse;

+  psnr->samples[0] = total_samples;

+  psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,

+    (double)total_sse);

+}

+#endif  // !CONFIG_VP9_HIGHBITDEPTH

+void vpx_calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,

+                   PSNR_STATS *psnr) {

+  static const double peak = 255.0;

+  const int widths[3] = {

+    a->y_crop_width, a->uv_crop_width, a->uv_crop_width };

+  const int heights[3] = {

+    a->y_crop_height, a->uv_crop_height, a->uv_crop_height };

+  const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer };

+  const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride };

+  const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer };

+  const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride };

+  int i;

+  uint64_t total_sse = 0;

+  uint32_t total_samples = 0;

+  for (i = 0; i < 3; ++i) {

+    const int w = widths[i];

+    const int h = heights[i];

+    const uint32_t samples = w * h;

+    const uint64_t sse = get_sse(a_planes[i], a_strides[i],

+      b_planes[i], b_strides[i],

+      w, h);

+    psnr->sse[1 + i] = sse;

+    psnr->samples[1 + i] = samples;

+    psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse);

+    total_sse += sse;

+    total_samples += samples;

+  }

+  psnr->sse[0] = total_sse;

+  psnr->samples[0] = total_samples;

+  psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak,

+    (double)total_sse);

+}

--- /dev/null

+++ b/vpx_dsp/psnr.h

@@ -1,0 +1,63 @@

+/*

+*  Copyright (c) 2016 The WebM project authors. All Rights Reserved.

+*

+*  Use of this source code is governed by a BSD-style license

+*  that can be found in the LICENSE file in the root of the source

+*  tree. An additional intellectual property rights grant can be found

+*  in the file PATENTS.  All contributing project authors may

+*  be found in the AUTHORS file in the root of the source tree.

+*/

+#ifndef VPX_DSP_PSNR_H_

+#define VPX_DSP_PSNR_H_

+#include "vpx_scale/yv12config.h"

+#define MAX_PSNR 100.0

+#ifdef __cplusplus

+extern "C" {

+#endif

+typedef struct {

+  double psnr[4];       // total/y/u/v

+  uint64_t sse[4];      // total/y/u/v

+  uint32_t samples[4];  // total/y/u/v

+} PSNR_STATS;

+// TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t

+/*!\brief Converts SSE to PSNR

+*

+* Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR).

+*

+* \param[in]    samples       Number of samples

+* \param[in]    peak          Max sample value

+* \param[in]    sse           Sum of squared errors

+*/

+double vpx_sse_to_psnr(double samples, double peak, double sse);

+int64_t vpx_get_y_sse(const YV12_BUFFER_CONFIG *a,

+                      const YV12_BUFFER_CONFIG *b);

+#if CONFIG_VP9_HIGHBITDEPTH

+int64_t vpx_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,

+                             const YV12_BUFFER_CONFIG *b);

+void vpx_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a,

+                      const YV12_BUFFER_CONFIG *b,

+                      PSNR_STATS *psnr,

+                      unsigned int bit_depth,

+                      unsigned int in_bit_depth);

+#endif

+void vpx_calc_psnr(const YV12_BUFFER_CONFIG *a,

+               const YV12_BUFFER_CONFIG *b,

+               PSNR_STATS *psnr);

+double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,

+                   const YV12_BUFFER_CONFIG *dest,

+                   double *phvs_y, double *phvs_u,

+                   double *phvs_v, uint32_t bd, uint32_t in_bd);

+#ifdef __cplusplus

+}  // extern "C"

+#endif

+#endif  // VPX_DSP_PSNR_H_

--- a/vpx_dsp/psnrhvs.c

+++ b/vpx_dsp/psnrhvs.c

@@ -10,6 +10,7 @@

  *  This code was originally written by: Gregory Maxwell, at the Daala

  *  project.

*/

+#include <assert.h>

 #include <stdio.h>

 #include <stdlib.h>

 #include <math.h>

@@ -18,6 +19,7 @@

 #include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/ssim.h"

 #include "vpx_ports/system_state.h"

+#include "vpx_dsp/psnr.h"

 #if !defined(M_PI)

 # define M_PI (3.141592653589793238462643)

@@ -26,14 +28,29 @@

 static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,

                            int xstride) {

+  int i, j;

   (void) xstride;

   vpx_fdct8x8(x, y, ystride);

+  for (i = 0; i < 8; i++)

+    for (j = 0; j< 8; j++)

+      *(y + ystride*i + j) = (*(y + ystride*i + j) + 4) >> 3;

+#if CONFIG_VP9_HIGHBITDEPTH

+static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,

+                           int xstride) {

+  int i, j;

+  (void) xstride;

+  vpx_highbd_fdct8x8(x, y, ystride);

+  for (i = 0; i < 8; i++)

+    for (j = 0; j< 8; j++)

+      *(y + ystride*i + j) = (*(y + ystride*i + j) + 4) >> 3;

+}

+#endif

 /* Normalized inverse quantization matrix for 8x8 DCT at the point of

  * transparency. This is not the JPEG based matrix from the paper,

  this one gives a slightly higher MOS agreement.*/

-static const float csf_y[8][8] = {

+static const double csf_y[8][8] = {

     {1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334,

      0.678296995242, 0.466224900598, 0.3265091542},

     {2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963,

@@ -50,7 +67,7 @@

      0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001},

     {0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565,

      0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276}};

-static const float csf_cb420[8][8] = {

+static const double csf_cb420[8][8] = {

     {1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788,

      0.898018824055, 0.74725392039, 0.615105596242},

     {2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972,

@@ -67,7 +84,7 @@

      0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733},

     {0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374,

      0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237}};

-static const float csf_cr420[8][8] = {

+static const double csf_cr420[8][8] = {

     {2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469,

      0.867069376285, 0.721500455585, 0.593906509971},

     {2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198,

@@ -85,23 +102,38 @@

     {0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023,

      0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658}};

-static double convert_score_db(double _score, double _weight) {

-  return 10 * (log10(255 * 255) - log10(_weight * _score));

+static double convert_score_db(double _score, double _weight, int bit_depth) {

+  int16_t pix_max = 255;

+  assert(_score * _weight >= 0.0);

+  if (bit_depth == 10)

+    pix_max = 1023;

+  else if (bit_depth == 12)

+    pix_max = 4095;

+  if (_weight * _score < pix_max * pix_max * 1e-10)

+    return MAX_PSNR;

+  return 10 * (log10(pix_max * pix_max) - log10(_weight * _score));

-static double calc_psnrhvs(const unsigned char *_src, int _systride,

-                           const unsigned char *_dst, int _dystride,

-                           double _par, int _w, int _h, int _step,

-                           const float _csf[8][8]) {

-  float ret;

+static double calc_psnrhvs(const unsigned char *src, int _systride,

+                               const unsigned char *dst, int _dystride,

+                               double _par, int _w, int _h, int _step,

+                               const double _csf[8][8], uint32_t bit_depth,

+                               uint32_t _shift) {

+  double ret;

+  const uint8_t *_src8 = src;

+  const uint8_t *_dst8 = dst;

+  const uint16_t *_src16 = CONVERT_TO_SHORTPTR(src);

+  const uint16_t *_dst16 = CONVERT_TO_SHORTPTR(dst);

   int16_t dct_s[8 * 8], dct_d[8 * 8];

   tran_low_t dct_s_coef[8 * 8], dct_d_coef[8 * 8];

-  float mask[8][8];

+  double mask[8][8];

   int pixels;

   int x;

   int y;

   (void) _par;

   ret = pixels = 0;

   /*In the PSNR-HVS-M paper[1] the authors describe the construction of

    their masking table as "we have used the quantization table for the

    color component Y of JPEG [6] that has been also obtained on the

@@ -126,23 +158,28 @@

     for (x = 0; x < _w - 7; x += _step) {

       int i;

       int j;

-      float s_means[4];

-      float d_means[4];

-      float s_vars[4];

-      float d_vars[4];

-      float s_gmean = 0;

-      float d_gmean = 0;

-      float s_gvar = 0;

-      float d_gvar = 0;

-      float s_mask = 0;

-      float d_mask = 0;

+      double s_means[4];

+      double d_means[4];

+      double s_vars[4];

+      double d_vars[4];

+      double s_gmean = 0;

+      double d_gmean = 0;

+      double s_gvar = 0;

+      double d_gvar = 0;

+      double s_mask = 0;

+      double d_mask = 0;

       for (i = 0; i < 4; i++)

         s_means[i] = d_means[i] = s_vars[i] = d_vars[i] = 0;

       for (i = 0; i < 8; i++) {

         for (j = 0; j < 8; j++) {

           int sub = ((i & 12) >> 2) + ((j & 12) >> 1);

-          dct_s[i * 8 + j] = _src[(y + i) * _systride + (j + x)];

-          dct_d[i * 8 + j] = _dst[(y + i) * _dystride + (j + x)];

+          if (bit_depth == 8 && _shift == 0) {

+            dct_s[i * 8 + j] = _src8[(y + i) * _systride + (j + x)];

+            dct_d[i * 8 + j] = _dst8[(y + i) * _dystride + (j + x)];

+          } else if (bit_depth == 10 || bit_depth == 12) {

+            dct_s[i * 8 + j] = _src16[(y + i) * _systride + (j + x)] >> _shift;

+            dct_d[i * 8 + j] = _dst16[(y + i) * _dystride + (j + x)] >> _shift;

+          }

           s_gmean += dct_s[i * 8 + j];

           d_gmean += dct_d[i * 8 + j];

           s_means[sub] += dct_s[i * 8 + j];

@@ -176,8 +213,16 @@

         s_gvar = (s_vars[0] + s_vars[1] + s_vars[2] + s_vars[3]) / s_gvar;

       if (d_gvar > 0)

         d_gvar = (d_vars[0] + d_vars[1] + d_vars[2] + d_vars[3]) / d_gvar;

-      od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);

-      od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);

+#if CONFIG_VP9_HIGHBITDEPTH

+      if (bit_depth == 10 || bit_depth == 12) {

+        hbd_od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);

+        hbd_od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);

+      }

+#endif

+      if (bit_depth == 8) {

+        od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);

+        od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);

+      }

       for (i = 0; i < 8; i++)

         for (j = (i == 0); j < 8; j++)

           s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j];

@@ -190,8 +235,8 @@

         s_mask = d_mask;

       for (i = 0; i < 8; i++) {

         for (j = 0; j < 8; j++) {

-          float err;

-          err = fabs((float)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]));

+          double err;

+          err = fabs((double)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]));

           if (i != 0 || j != 0)

             err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j];

           ret += (err * _csf[i][j]) * (err * _csf[i][j]);

@@ -203,25 +248,35 @@

   ret /= pixels;

   return ret;

-double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,

+double vpx_psnrhvs(const YV12_BUFFER_CONFIG *src,

                    const YV12_BUFFER_CONFIG *dest, double *y_psnrhvs,

-                   double *u_psnrhvs, double *v_psnrhvs) {

+                   double *u_psnrhvs, double *v_psnrhvs,

+                   uint32_t bd, uint32_t in_bd) {

   double psnrhvs;

   const double par = 1.0;

   const int step = 7;

+  uint32_t bd_shift = 0;

   vpx_clear_system_state();

-  *y_psnrhvs = calc_psnrhvs(source->y_buffer, source->y_stride, dest->y_buffer,

-                            dest->y_stride, par, source->y_crop_width,

-                            source->y_crop_height, step, csf_y);

-  *u_psnrhvs = calc_psnrhvs(source->u_buffer, source->uv_stride, dest->u_buffer,

-                            dest->uv_stride, par, source->uv_crop_width,

-                            source->uv_crop_height, step, csf_cb420);

+  assert(bd == 8 || bd == 10 || bd == 12);

+  assert(bd >= in_bd);

-  *v_psnrhvs = calc_psnrhvs(source->v_buffer, source->uv_stride, dest->v_buffer,

-                            dest->uv_stride, par, source->uv_crop_width,

-                            source->uv_crop_height, step, csf_cr420);

-  psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs));

+  bd_shift = bd - in_bd;

-  return convert_score_db(psnrhvs, 1.0);

+  *y_psnrhvs = calc_psnrhvs(src->y_buffer, src->y_stride, dest->y_buffer,

+                            dest->y_stride, par, src->y_crop_width,

+                            src->y_crop_height, step, csf_y, bd,

+                            bd_shift);

+  *u_psnrhvs = calc_psnrhvs(src->u_buffer, src->uv_stride, dest->u_buffer,

+                            dest->uv_stride, par, src->uv_crop_width,

+                            src->uv_crop_height, step, csf_cb420, bd,

+                            bd_shift);

+  *v_psnrhvs = calc_psnrhvs(src->v_buffer, src->uv_stride, dest->v_buffer,

+                            dest->uv_stride, par, src->uv_crop_width,

+                            src->uv_crop_height, step, csf_cr420, bd,

+                            bd_shift);

+  psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs));

+  return convert_score_db(psnrhvs, 1.0, in_bd);

--- a/vpx_dsp/ssim.c

+++ b/vpx_dsp/ssim.c

@@ -8,6 +8,7 @@

  *  be found in the AUTHORS file in the root of the source tree.

*/

+#include <assert.h>

 #include <math.h>

 #include "./vpx_dsp_rtcd.h"

 #include "vpx_dsp/ssim.h"

@@ -66,17 +67,32 @@

 static const int64_t cc1 =  26634;  // (64^2*(.01*255)^2

 static const int64_t cc2 = 239708;  // (64^2*(.03*255)^2

+static const int64_t cc1_10 = 428658;  // (64^2*(.01*1023)^2

+static const int64_t cc2_10 = 3857925;  // (64^2*(.03*1023)^2

+static const int64_t cc1_12 = 6868593;  // (64^2*(.01*4095)^2

+static const int64_t cc2_12 = 61817334;  // (64^2*(.03*4095)^2

 static double similarity(uint32_t sum_s, uint32_t sum_r,

                          uint32_t sum_sq_s, uint32_t sum_sq_r,

-                         uint32_t sum_sxr, int count) {

+                         uint32_t sum_sxr, int count,

+                         uint32_t bd) {

   int64_t ssim_n, ssim_d;

   int64_t c1, c2;

+  if (bd == 8) {

+    // scale the constants by number of pixels

+    c1 = (cc1 * count * count) >> 12;

+    c2 = (cc2 * count * count) >> 12;

+  } else if (bd == 10) {

+    c1 = (cc1_10 * count * count) >> 12;

+    c2 = (cc2_10 * count * count) >> 12;

+  } else if (bd == 12) {

+    c1 = (cc1_12 * count * count) >> 12;

+    c2 = (cc2_12 * count * count) >> 12;

+  } else {

+    c1 = c2 = 0;

+    assert(0);

+  }

-  // scale the constants by number of pixels

-  c1 = (cc1 * count * count) >> 12;

-  c2 = (cc2 * count * count) >> 12;

   ssim_n = (2 * sum_s * sum_r + c1) * ((int64_t) 2 * count * sum_sxr -

                                        (int64_t) 2 * sum_s * sum_r + c2);

@@ -91,22 +107,21 @@

   uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;

   vpx_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,

                      &sum_sxr);

-  return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);

+  return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64, 8);

 #if CONFIG_VP9_HIGHBITDEPTH

 static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r,

-                              int rp, unsigned int bd) {

+                              int rp, uint32_t bd, uint32_t shift) {

   uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;

-  const int oshift = bd - 8;

   vpx_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,

                             &sum_sxr);

-  return similarity(sum_s >> oshift,

-                    sum_r >> oshift,

-                    sum_sq_s >> (2 * oshift),

-                    sum_sq_r >> (2 * oshift),

-                    sum_sxr >> (2 * oshift),

-                    64);

+  return similarity(sum_s >> shift,

+                    sum_r >> shift,

+                    sum_sq_s >> (2 * shift),

+                    sum_sq_r >> (2 * shift),

+                    sum_sxr >> (2 * shift),

+                    64, bd);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

@@ -136,7 +151,7 @@

 #if CONFIG_VP9_HIGHBITDEPTH

 static double vpx_highbd_ssim2(const uint8_t *img1, const uint8_t *img2,

                                int stride_img1, int stride_img2, int width,

-                               int height, unsigned int bd) {

+                               int height, uint32_t bd, uint32_t shift) {

   int i, j;

   int samples = 0;

   double ssim_total = 0;

@@ -147,7 +162,7 @@

     for (j = 0; j <= width - 8; j += 4) {

       double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1,

                                  CONVERT_TO_SHORTPTR(img2 + j), stride_img2,

-                                 bd);

+                                 bd, shift);

       ssim_total += v;

       samples++;

@@ -182,31 +197,6 @@

   return ssimv;

-double vpx_calc_ssimg(const YV12_BUFFER_CONFIG *source,

-                      const YV12_BUFFER_CONFIG *dest,

-                      double *ssim_y, double *ssim_u, double *ssim_v) {

-  double ssim_all = 0;

-  double a, b, c;

-  a = vpx_ssim2(source->y_buffer, dest->y_buffer,

-                source->y_stride, dest->y_stride,

-                source->y_crop_width, source->y_crop_height);

-  b = vpx_ssim2(source->u_buffer, dest->u_buffer,

-                source->uv_stride, dest->uv_stride,

-                source->uv_crop_width, source->uv_crop_height);

-  c = vpx_ssim2(source->v_buffer, dest->v_buffer,

-                source->uv_stride, dest->uv_stride,

-                source->uv_crop_width, source->uv_crop_height);

-  *ssim_y = a;

-  *ssim_u = b;

-  *ssim_v = c;

-  ssim_all = (a * 4 + b + c) / 6;

-  return ssim_all;

-}

 // traditional ssim as per: http://en.wikipedia.org/wiki/Structural_similarity

//

 // Re working out the math ->

@@ -455,21 +445,28 @@

 #if CONFIG_VP9_HIGHBITDEPTH

 double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,

                             const YV12_BUFFER_CONFIG *dest,

-                            double *weight, unsigned int bd) {

+                            double *weight, uint32_t bd, uint32_t in_bd) {

   double a, b, c;

   double ssimv;

+  uint32_t shift = 0;

+  assert(bd >= in_bd);

+  shift = bd - in_bd;

   a = vpx_highbd_ssim2(source->y_buffer, dest->y_buffer,

                        source->y_stride, dest->y_stride,

-                       source->y_crop_width, source->y_crop_height, bd);

+                       source->y_crop_width, source->y_crop_height,

+                       in_bd, shift);

   b = vpx_highbd_ssim2(source->u_buffer, dest->u_buffer,

                        source->uv_stride, dest->uv_stride,

-                       source->uv_crop_width, source->uv_crop_height, bd);

+                       source->uv_crop_width, source->uv_crop_height,

+                       in_bd, shift);

   c = vpx_highbd_ssim2(source->v_buffer, dest->v_buffer,

                        source->uv_stride, dest->uv_stride,

-                       source->uv_crop_width, source->uv_crop_height, bd);

+                       source->uv_crop_width, source->uv_crop_height,

+                       in_bd, shift);

   ssimv = a * .8 + .1 * (b + c);

@@ -478,28 +475,4 @@

   return ssimv;

-double vpx_highbd_calc_ssimg(const YV12_BUFFER_CONFIG *source,

-                             const YV12_BUFFER_CONFIG *dest, double *ssim_y,

-                             double *ssim_u, double *ssim_v, unsigned int bd) {

-  double ssim_all = 0;

-  double a, b, c;

-  a = vpx_highbd_ssim2(source->y_buffer, dest->y_buffer,

-                       source->y_stride, dest->y_stride,

-                       source->y_crop_width, source->y_crop_height, bd);

-  b = vpx_highbd_ssim2(source->u_buffer, dest->u_buffer,

-                       source->uv_stride, dest->uv_stride,

-                       source->uv_crop_width, source->uv_crop_height, bd);

-  c = vpx_highbd_ssim2(source->v_buffer, dest->v_buffer,

-                       source->uv_stride, dest->uv_stride,

-                       source->uv_crop_width, source->uv_crop_height, bd);

-  *ssim_y = a;

-  *ssim_u = b;

-  *ssim_v = c;

-  ssim_all = (a * 4 + b + c) / 6;

-  return ssim_all;

-}

 #endif  // CONFIG_VP9_HIGHBITDEPTH

--- a/vpx_dsp/ssim.h

+++ b/vpx_dsp/ssim.h

@@ -11,6 +11,8 @@

 #ifndef VPX_DSP_SSIM_H_

 #define VPX_DSP_SSIM_H_

+#define MAX_SSIM_DB 100.0;

 #ifdef __cplusplus

 extern "C" {

 #endif

@@ -68,30 +70,16 @@

                      const YV12_BUFFER_CONFIG *dest,

                      double *weight);

-double vpx_calc_ssimg(const YV12_BUFFER_CONFIG *source,

-                      const YV12_BUFFER_CONFIG *dest,

-                      double *ssim_y, double *ssim_u, double *ssim_v);

 double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source,

                          const YV12_BUFFER_CONFIG *dest,

-                         double *ssim_y, double *ssim_u, double *ssim_v);

+                         double *ssim_y, double *ssim_u,

+                         double *ssim_v, uint32_t bd, uint32_t in_bd);

-double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source,

-                   const YV12_BUFFER_CONFIG *dest,

-                   double *ssim_y, double *ssim_u, double *ssim_v);

 #if CONFIG_VP9_HIGHBITDEPTH

 double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source,

                             const YV12_BUFFER_CONFIG *dest,

                             double *weight,

-                            unsigned int bd);

-double vpx_highbd_calc_ssimg(const YV12_BUFFER_CONFIG *source,

-                             const YV12_BUFFER_CONFIG *dest,

-                             double *ssim_y,

-                             double *ssim_u,

-                             double *ssim_v,

-                             unsigned int bd);

+                            uint32_t bd, uint32_t in_bd);

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #ifdef __cplusplus

--- a/vpx_dsp/vpx_dsp.mk

+++ b/vpx_dsp/vpx_dsp.mk

@@ -22,6 +22,8 @@

 DSP_SRCS-yes += bitwriter.c

 DSP_SRCS-yes += bitwriter_buffer.c

 DSP_SRCS-yes += bitwriter_buffer.h

+DSP_SRCS-yes += psnr.c

+DSP_SRCS-yes += psnr.h

 DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.c

 DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.h

 DSP_SRCS-$(CONFIG_INTERNAL_STATS) += psnrhvs.c

--- a/vpx_ports/mem.h

+++ b/vpx_ports/mem.h

@@ -45,9 +45,9 @@

 #define ALIGN_POWER_OF_TWO(value, n) \

     (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))

+#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)(x)) << 1))

 #if CONFIG_VP9_HIGHBITDEPTH

-#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1))

-#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1))

+#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)(x)) >> 1))

 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #endif  // VPX_PORTS_MEM_H_

--

⑨