shithub: libvpx

Download patch

ref: f42e41f2eff366338f8f7b36d5b6f8c9c5a26573
parent: 25b609b62bbb3ee6b8b4c303675a6ff54484739a
parent: 510557e2eb058c7f939a7109eaaf6b03d9054390
author: Jim Bankoski <jimbankoski@google.com>
date: Sat Nov 24 16:38:36 EST 2012

Merge "removed the idct rtcd idct calls" into experimental

--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -15,7 +15,6 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 
 extern "C" {
-#include "vp9/common/idct.h"
 #include "vp9_rtcd.h"
 }
 
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -15,7 +15,6 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 
 extern "C" {
-#include "vp9/common/idct.h"
 #include "vp9_rtcd.h"
 }
 
--- a/test/idct8x8_test.cc
+++ b/test/idct8x8_test.cc
@@ -15,7 +15,6 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 
 extern "C" {
-#include "vp9/common/idct.h"
 #include "vp9_rtcd.h"
 }
 
--- a/vp8/common/ppc/systemdependent.c
+++ b/vp8/common/ppc/systemdependent.c
@@ -12,12 +12,7 @@
 #include "subpixel.h"
 #include "loopfilter.h"
 #include "recon.h"
-#include "idct.h"
 #include "onyxc_int.h"
-
-void (*vp8_short_idct4x4)(short *input, short *output, int pitch);
-void (*vp8_short_idct4x4_1)(short *input, short *output, int pitch);
-void (*vp8_dc_only_idct)(short input_dc, short *output, int pitch);
 
 extern void (*vp8_post_proc_down_and_across_mb_row)(
     unsigned char *src_ptr,
--- a/vp9/common/arm/arm_systemdependent.c
+++ b/vp9/common/arm/arm_systemdependent.c
@@ -15,7 +15,6 @@
 #include "vp9/common/subpixel.h"
 #include "vp9/common/loopfilter.h"
 #include "vp9/common/recon.h"
-#include "vp9/common/idct.h"
 #include "vp9/common/onyxc_int.h"
 
 void vp9_arch_arm_common_init(VP9_COMMON *ctx) {
--- a/vp9/common/blockd.h
+++ b/vp9/common/blockd.h
@@ -145,6 +145,11 @@
 
 #define VP9_MVREFS (1 + SPLITMV - NEARESTMV)
 
+#if CONFIG_LOSSLESS
+#define WHT_UPSCALE_FACTOR 3
+#define Y2_WHT_UPSCALE_FACTOR 2
+#endif
+
 typedef enum {
   B_DC_PRED,          /* average of above and left pixels */
   B_TM_PRED,
@@ -370,6 +375,14 @@
 
   unsigned int frames_since_golden;
   unsigned int frames_till_alt_ref_frame;
+
+  /* Inverse transform function pointers. */
+  void (*inv_xform4x4_1_x8)(short *input, short *output, int pitch);
+  void (*inv_xform4x4_x8)(short *input, short *output, int pitch);
+  void (*inv_walsh4x4_1)(short *in, short *out);
+  void (*inv_walsh4x4_lossless)(short *in, short *out);
+
+
   vp9_subpix_fn_t  subpixel_predict;
   vp9_subpix_fn_t  subpixel_predict8x4;
   vp9_subpix_fn_t  subpixel_predict8x8;
--- a/vp9/common/generic/systemdependent.c
+++ b/vp9/common/generic/systemdependent.c
@@ -13,7 +13,6 @@
 #include "vp9_rtcd.h"
 #include "vp9/common/subpixel.h"
 #include "vp9/common/loopfilter.h"
-#include "vp9/common/idct.h"
 #include "vp9/common/onyxc_int.h"
 
 extern void vp9_arch_x86_common_init(VP9_COMMON *ctx);
@@ -22,18 +21,6 @@
 void vp9_machine_specific_config(VP9_COMMON *ctx) {
 #if CONFIG_RUNTIME_CPU_DETECT
   VP9_COMMON_RTCD *rtcd = &ctx->rtcd;
-
-  rtcd->idct.idct1        = vp9_short_idct4x4llm_1_c;
-  rtcd->idct.idct16       = vp9_short_idct4x4llm_c;
-  rtcd->idct.idct1_scalar_add = vp9_dc_only_idct_add_c;
-  rtcd->idct.iwalsh1      = vp9_short_inv_walsh4x4_1_c;
-  rtcd->idct.iwalsh16     = vp9_short_inv_walsh4x4_c;
-  rtcd->idct.idct8        = vp9_short_idct8x8_c;
-  rtcd->idct.idct10_8     = vp9_short_idct10_8x8_c;
-  rtcd->idct.idct1_scalar_add_8x8 = vp9_dc_only_idct_add_8x8_c;
-  rtcd->idct.ihaar2       = vp9_short_ihaar2x2_c;
-  rtcd->idct.idct16x16    = vp9_short_idct16x16_c;
-  rtcd->idct.idct10_16x16 = vp9_short_idct10_16x16_c;
 
 #if CONFIG_POSTPROC || (CONFIG_VP9_ENCODER && CONFIG_INTERNAL_STATS)
   rtcd->postproc.down             = vp9_mbpost_proc_down_c;
--- a/vp9/common/idct.h
+++ /dev/null
@@ -1,156 +1,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef __INC_IDCT_H
-#define __INC_IDCT_H
-
-#include "vp9/common/blockd.h"
-
-#define prototype_second_order(sym) \
-  void sym(short *input, short *output)
-
-#define prototype_idct(sym) \
-  void sym(short *input, short *output, int pitch)
-
-#define prototype_idct_scalar_add(sym) \
-  void sym(short input, \
-           unsigned char *pred, unsigned char *output, \
-           int pitch, int stride)
-
-#if ARCH_X86 || ARCH_X86_64
-#include "x86/idct_x86.h"
-#endif
-
-#ifdef _MSC_VER
-/* TODO: remove these after integer implmementations are done */
-#define M_PI       3.14159265358979323846
-#define round(x) (((x)>0)? floor((x)+0.5): ceil((x)-0.5))
-#endif
-
-
-#if ARCH_ARM
-#include "arm/idct_arm.h"
-#endif
-
-#if CONFIG_LOSSLESS
-#define WHT_UPSCALE_FACTOR 3
-#define Y2_WHT_UPSCALE_FACTOR 2
-#endif
-
-#ifndef vp9_idct_idct16x16
-#define vp9_idct_idct16x16 vp9_short_idct16x16_c
-#endif
-extern prototype_idct(vp9_idct_idct16x16);
-
-#ifndef vp9_idct_idct10_16x16
-#define vp9_idct_idct10_16x16 vp9_short_idct10_16x16_c
-#endif
-extern prototype_idct(vp9_idct_idct10_16x16);
-
-#ifndef vp9_idct_idct8
-#define vp9_idct_idct8 vp9_short_idct8x8_c
-#endif
-extern prototype_idct(vp9_idct_idct8);
-
-#ifndef vp9_idct_idct10_8
-#define vp9_idct_idct10_8 vp9_short_idct10_8x8_c
-#endif
-extern prototype_idct(vp9_idct_idct10_8);
-
-#ifndef vp9_idct_idct8_1
-#define vp9_idct_idct8_1 vp9_short_idct8x8_1_c
-#endif
-extern prototype_idct(vp9_idct_idct8_1);
-
-#ifndef vp9_idct_ihaar2
-#define vp9_idct_ihaar2 vp9_short_ihaar2x2_c
-#endif
-extern prototype_idct(vp9_idct_ihaar2);
-
-#ifndef vp9_idct_ihaar2_1
-#define vp9_idct_ihaar2_1 vp9_short_ihaar2x2_1_c
-#endif
-extern prototype_idct(vp9_idct_ihaar2_1);
-
-#ifndef vp9_idct_idct1_scalar_add_8x8
-#define vp9_idct_idct1_scalar_add_8x8 vp9_dc_only_idct_add_8x8_c
-#endif
-extern prototype_idct_scalar_add(vp9_idct_idct1_scalar_add_8x8);
-
-
-
-#ifndef vp9_idct_idct1
-#define vp9_idct_idct1 vp9_short_idct4x4llm_1_c
-#endif
-extern prototype_idct(vp9_idct_idct1);
-
-#ifndef vp9_idct_idct16
-#define vp9_idct_idct16 vp9_short_idct4x4llm_c
-#endif
-extern prototype_idct(vp9_idct_idct16);
-
-#ifndef vp9_idct_idct1_scalar_add
-#define vp9_idct_idct1_scalar_add vp9_dc_only_idct_add_c
-#endif
-extern prototype_idct_scalar_add(vp9_idct_idct1_scalar_add);
-
-
-#ifndef vp9_idct_iwalsh1
-#define vp9_idct_iwalsh1 vp9_short_inv_walsh4x4_1_c
-#endif
-extern prototype_second_order(vp9_idct_iwalsh1);
-
-#ifndef vp9_idct_iwalsh16
-#define vp9_idct_iwalsh16 vp9_short_inv_walsh4x4_c
-#endif
-extern prototype_second_order(vp9_idct_iwalsh16);
-
-#if CONFIG_LOSSLESS
-extern prototype_idct(vp9_short_inv_walsh4x4_x8_c);
-extern prototype_idct(vp9_short_inv_walsh4x4_1_x8_c);
-extern prototype_idct_scalar_add(vp9_dc_only_inv_walsh_add_c);
-extern prototype_second_order(vp9_short_inv_walsh4x4_lossless_c);
-extern prototype_second_order(vp9_short_inv_walsh4x4_1_lossless_c);
-#endif
-
-void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
-                  TX_TYPE tx_type, int tx_dim);
-
-typedef prototype_idct((*vp9_idct_fn_t));
-typedef prototype_idct_scalar_add((*vp9_idct_scalar_add_fn_t));
-typedef prototype_second_order((*vp9_second_order_fn_t));
-
-typedef struct {
-  vp9_idct_fn_t            idct1;
-  vp9_idct_fn_t            idct16;
-  vp9_idct_scalar_add_fn_t idct1_scalar_add;
-
-  vp9_second_order_fn_t iwalsh1;
-  vp9_second_order_fn_t iwalsh16;
-
-  vp9_idct_fn_t            idct8;
-  vp9_idct_fn_t            idct10_8;
-  vp9_idct_fn_t            idct8_1;
-  vp9_idct_scalar_add_fn_t idct1_scalar_add_8x8;
-  vp9_idct_fn_t ihaar2;
-  vp9_idct_fn_t ihaar2_1;
-
-  vp9_idct_fn_t            idct16x16;
-  vp9_idct_fn_t            idct10_16x16;
-} vp9_idct_rtcd_vtable_t;
-
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IDCT_INVOKE(ctx,fn) (ctx)->fn
-#else
-#define IDCT_INVOKE(ctx,fn) vp9_idct_##fn
-#endif
-
-#endif
--- a/vp9/common/idctllm.c
+++ b/vp9/common/idctllm.c
@@ -25,7 +25,6 @@
 #include <assert.h>
 #include <math.h>
 #include "vpx_ports/config.h"
-#include "vp9/common/idct.h"
 #include "vp9/common/systemdependent.h"
 
 #include "vp9/common/blockd.h"
--- a/vp9/common/invtrans.c
+++ b/vp9/common/invtrans.c
@@ -9,6 +9,7 @@
  */
 
 #include "invtrans.h"
+#include "./vp9_rtcd.h"
 
 static void recon_dcblock(MACROBLOCKD *xd) {
   BLOCKD *b = &xd->block[24];
@@ -28,108 +29,92 @@
   xd->block[12].dqcoeff[0] = b->diff[8];
 }
 
-void vp9_inverse_transform_b_4x4(const vp9_idct_rtcd_vtable_t *rtcd,
-                                 BLOCKD *b, int pitch) {
+void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) {
+  BLOCKD *b = &xd->block[block];
   if (b->eob <= 1)
-    IDCT_INVOKE(rtcd, idct1)(b->dqcoeff, b->diff, pitch);
+    xd->inv_xform4x4_1_x8(b->dqcoeff, b->diff, pitch);
   else
-    IDCT_INVOKE(rtcd, idct16)(b->dqcoeff, b->diff, pitch);
+    xd->inv_xform4x4_x8(b->dqcoeff, b->diff, pitch);
 }
 
-void vp9_inverse_transform_mby_4x4(const vp9_idct_rtcd_vtable_t *rtcd,
-                                   MACROBLOCKD *xd) {
+void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
   int i;
   BLOCKD *blockd = xd->block;
 
   if (xd->mode_info_context->mbmi.mode != SPLITMV) {
     /* do 2nd order transform on the dc block */
-    IDCT_INVOKE(rtcd, iwalsh16)(blockd[24].dqcoeff, blockd[24].diff);
+    vp9_short_inv_walsh4x4(blockd[24].dqcoeff, blockd[24].diff);
     recon_dcblock(xd);
   }
 
   for (i = 0; i < 16; i++) {
-    vp9_inverse_transform_b_4x4(rtcd, &blockd[i], 32);
+    vp9_inverse_transform_b_4x4(xd, i, 32);
   }
 }
 
-void vp9_inverse_transform_mbuv_4x4(const vp9_idct_rtcd_vtable_t *rtcd,
-                                    MACROBLOCKD *xd) {
+void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd) {
   int i;
-  BLOCKD *blockd = xd->block;
-
   for (i = 16; i < 24; i++) {
-    vp9_inverse_transform_b_4x4(rtcd, &blockd[i], 16);
+    vp9_inverse_transform_b_4x4(xd, i, 16);
   }
 }
 
-void vp9_inverse_transform_mb_4x4(const vp9_idct_rtcd_vtable_t *rtcd,
-                                  MACROBLOCKD *xd) {
-  vp9_inverse_transform_mby_4x4(rtcd, xd);
-  vp9_inverse_transform_mbuv_4x4(rtcd, xd);
+void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd) {
+  vp9_inverse_transform_mby_4x4(xd);
+  vp9_inverse_transform_mbuv_4x4(xd);
 }
 
-void vp9_inverse_transform_b_8x8(const vp9_idct_rtcd_vtable_t *rtcd,
-                                 short *input_dqcoeff, short *output_coeff,
+void vp9_inverse_transform_b_8x8(short *input_dqcoeff, short *output_coeff,
                                  int pitch) {
-  // int b,i;
-  // if (b->eob > 1)
-  IDCT_INVOKE(rtcd, idct8)(input_dqcoeff, output_coeff, pitch);
-  // else
-  // IDCT_INVOKE(rtcd, idct8_1)(b->dqcoeff, b->diff, pitch);//pitch
+  vp9_short_idct8x8(input_dqcoeff, output_coeff, pitch);
 }
 
-void vp9_inverse_transform_mby_8x8(const vp9_idct_rtcd_vtable_t *rtcd,
-                                   MACROBLOCKD *xd) {
+void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
   int i;
   BLOCKD *blockd = xd->block;
 
   if (xd->mode_info_context->mbmi.mode != SPLITMV) {
     // do 2nd order transform on the dc block
-    IDCT_INVOKE(rtcd, ihaar2)(blockd[24].dqcoeff, blockd[24].diff, 8);
+    vp9_short_ihaar2x2(blockd[24].dqcoeff, blockd[24].diff, 8);
     recon_dcblock_8x8(xd); // need to change for 8x8
   }
 
   for (i = 0; i < 9; i += 8) {
-    vp9_inverse_transform_b_8x8(rtcd, &blockd[i].dqcoeff[0],
+    vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
                                 &blockd[i].diff[0], 32);
   }
   for (i = 2; i < 11; i += 8) {
-    vp9_inverse_transform_b_8x8(rtcd, &blockd[i + 2].dqcoeff[0],
+    vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],
                                 &blockd[i].diff[0], 32);
   }
 }
 
-void vp9_inverse_transform_mbuv_8x8(const vp9_idct_rtcd_vtable_t *rtcd,
-                                    MACROBLOCKD *xd) {
+void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd) {
   int i;
   BLOCKD *blockd = xd->block;
 
   for (i = 16; i < 24; i += 4) {
-    vp9_inverse_transform_b_8x8(rtcd, &blockd[i].dqcoeff[0],
+    vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
                                 &blockd[i].diff[0], 16);
   }
 }
 
-void vp9_inverse_transform_mb_8x8(const vp9_idct_rtcd_vtable_t *rtcd,
-                                  MACROBLOCKD *xd) {
-  vp9_inverse_transform_mby_8x8(rtcd, xd);
-  vp9_inverse_transform_mbuv_8x8(rtcd, xd);
+void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd) {
+  vp9_inverse_transform_mby_8x8(xd);
+  vp9_inverse_transform_mbuv_8x8(xd);
 }
 
-void vp9_inverse_transform_b_16x16(const vp9_idct_rtcd_vtable_t *rtcd,
-                                   short *input_dqcoeff,
+void vp9_inverse_transform_b_16x16(short *input_dqcoeff,
                                    short *output_coeff, int pitch) {
-  IDCT_INVOKE(rtcd, idct16x16)(input_dqcoeff, output_coeff, pitch);
+  vp9_short_idct16x16(input_dqcoeff, output_coeff, pitch);
 }
 
-void vp9_inverse_transform_mby_16x16(const vp9_idct_rtcd_vtable_t *rtcd,
-                                     MACROBLOCKD *xd) {
-  vp9_inverse_transform_b_16x16(rtcd, &xd->block[0].dqcoeff[0],
+void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) {
+  vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0],
                                 &xd->block[0].diff[0], 32);
 }
 
-void vp9_inverse_transform_mb_16x16(const vp9_idct_rtcd_vtable_t *rtcd,
-                                    MACROBLOCKD *xd) {
-  vp9_inverse_transform_mby_16x16(rtcd, xd);
-  vp9_inverse_transform_mbuv_8x8(rtcd, xd);
+void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) {
+  vp9_inverse_transform_mby_16x16(xd);
+  vp9_inverse_transform_mbuv_8x8(xd);
 }
--- a/vp9/common/invtrans.h
+++ b/vp9/common/invtrans.h
@@ -12,42 +12,30 @@
 #define __INC_INVTRANS_H
 
 #include "vpx_ports/config.h"
-#include "idct.h"
 #include "blockd.h"
 
-extern void vp9_inverse_transform_b_4x4(const vp9_idct_rtcd_vtable_t *rtcd,
-                                        BLOCKD *b, int pitch);
+extern void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch);
 
-extern void vp9_inverse_transform_mb_4x4(const vp9_idct_rtcd_vtable_t *rtcd,
-                                         MACROBLOCKD *xd);
+extern void vp9_inverse_transform_mb_4x4(MACROBLOCKD *xd);
 
-extern void vp9_inverse_transform_mby_4x4(const vp9_idct_rtcd_vtable_t *rtcd,
-                                          MACROBLOCKD *xd);
+extern void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd);
 
-extern void vp9_inverse_transform_mbuv_4x4(const vp9_idct_rtcd_vtable_t *rtcd,
-                                           MACROBLOCKD *xd);
+extern void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd);
 
-extern void vp9_inverse_transform_b_8x8(const vp9_idct_rtcd_vtable_t *rtcd,
-                                        short *input_dqcoeff,
+extern void vp9_inverse_transform_b_8x8(short *input_dqcoeff,
                                         short *output_coeff, int pitch);
 
-extern void vp9_inverse_transform_mb_8x8(const vp9_idct_rtcd_vtable_t *rtcd,
-                                         MACROBLOCKD *xd);
+extern void vp9_inverse_transform_mb_8x8(MACROBLOCKD *xd);
 
-extern void vp9_inverse_transform_mby_8x8(const vp9_idct_rtcd_vtable_t *rtcd,
-                                          MACROBLOCKD *xd);
+extern void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd);
 
-extern void vp9_inverse_transform_mbuv_8x8(const vp9_idct_rtcd_vtable_t *rtcd,
-                                           MACROBLOCKD *xd);
+extern void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd);
 
-extern void vp9_inverse_transform_b_16x16(const vp9_idct_rtcd_vtable_t *rtcd,
-                                          short *input_dqcoeff,
+extern void vp9_inverse_transform_b_16x16(short *input_dqcoeff,
                                           short *output_coeff, int pitch);
 
-extern void vp9_inverse_transform_mb_16x16(const vp9_idct_rtcd_vtable_t *rtcd,
-                                           MACROBLOCKD *xd);
+extern void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd);
 
-extern void vp9_inverse_transform_mby_16x16(const vp9_idct_rtcd_vtable_t *rtcd,
-                                            MACROBLOCKD *xd);
+extern void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd);
 
 #endif  // __INC_INVTRANS_H
--- a/vp9/common/onyxc_int.h
+++ b/vp9/common/onyxc_int.h
@@ -19,7 +19,6 @@
 #include "entropymv.h"
 #include "entropy.h"
 #include "entropymode.h"
-#include "idct.h"
 #if CONFIG_POSTPROC
 #include "postproc.h"
 #endif
@@ -148,7 +147,6 @@
 
 typedef struct VP9_COMMON_RTCD {
 #if CONFIG_RUNTIME_CPU_DETECT
-  vp9_idct_rtcd_vtable_t        idct;
 #if CONFIG_POSTPROC
   vp9_postproc_rtcd_vtable_t    postproc;
 #endif
--- a/vp9/common/ppc/systemdependent.c
+++ b/vp9/common/ppc/systemdependent.c
@@ -11,7 +11,6 @@
 #include "subpixel.h"
 #include "loopfilter.h"
 #include "recon.h"
-#include "idct.h"
 #include "onyxc_int.h"
 
 void (*vp8_short_idct4x4)(short *input, short *output, int pitch);
--- a/vp9/common/rtcd_defs.sh
+++ b/vp9/common/rtcd_defs.sh
@@ -227,6 +227,9 @@
 prototype unsigned int vp9_sad3x16 "const unsigned char *src_ptr, int  src_stride, const unsigned char *ref_ptr, int ref_stride, int max_sad"
 specialize vp9_sad3x16 sse2
 
+#
+# Sub Pixel Filters
+#
 prototype void vp9_eighttap_predict16x16 "unsigned char *src_ptr, int  src_pixels_per_line, int  xoffset, int  yoffset, unsigned char *dst_ptr, int  dst_pitch"
 specialize vp9_eighttap_predict16x16
 
@@ -310,6 +313,57 @@
 
 prototype void vp9_bilinear_predict_avg4x4 "unsigned char *src_ptr, int  src_pixels_per_line, int  xoffset, int  yoffset, unsigned char *dst_ptr, int  dst_pitch"
 specialize vp9_bilinear_predict_avg4x4
+
+#
+# dct
+#
+prototype void vp9_short_idct4x4llm_1 "short *input, short *output, int pitch"
+specialize vp9_short_idct4x4llm_1 mmx
+
+prototype void vp9_short_idct4x4llm "short *input, short *output, int pitch"
+specialize vp9_short_idct4x4llm mmx
+
+prototype void vp9_short_idct8x8 "short *input, short *output, int pitch"
+specialize vp9_short_idct8x8
+
+prototype void vp9_short_idct10_8x8 "short *input, short *output, int pitch"
+specialize vp9_short_idct10_8x8
+
+prototype void vp9_short_ihaar2x2 "short *input, short *output, int pitch"
+specialize vp9_short_ihaar2x2
+
+prototype void vp9_short_idct16x16 "short *input, short *output, int pitch"
+specialize vp9_short_idct16x16
+
+prototype void vp9_short_idct10_16x16 "short *input, short *output, int pitch"
+specialize vp9_short_idct10_16x16
+
+#
+# 2nd order
+#
+prototype void vp9_short_inv_walsh4x4_1 "short *in, short *out"
+specialize vp9_short_inv_walsh4x4_1
+
+prototype void vp9_short_inv_walsh4x4 "short *in, short *out"
+specialize vp9_short_inv_walsh4x4_
+
+
+# dct and add
+prototype void vp9_dc_only_idct_add_8x8 "short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride"
+specialize vp9_dc_only_idct_add_8x8
+
+prototype void vp9_dc_only_idct_add "short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride"
+specialize vp9_dc_only_idct_add
+
+if [ "$CONFIG_LOSSLESS" = "yes" ]; then
+prototype void vp9_short_inv_walsh4x4_1_x8 "short *input, short *output, int pitch"
+prototype void vp9_short_inv_walsh4x4_x8 "short *input, short *output, int pitch"
+prototype void vp9_dc_only_inv_walsh_add "short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride"
+prototype void vp9_short_inv_walsh4x4_1_lossless "short *in, short *out"
+prototype void vp9_short_inv_walsh4x4_lossless "short *in, short *out"
+fi
+
+
 
 if [ "$CONFIG_SUPERBLOCKS" = "yes" ]; then
 
--- a/vp9/common/x86/x86_systemdependent.c
+++ b/vp9/common/x86/x86_systemdependent.c
@@ -11,7 +11,6 @@
 #include "vpx_config.h"
 #include "vpx_ports/x86.h"
 #include "vp9/common/loopfilter.h"
-#include "vp9/common/idct.h"
 #include "vp9/common/pragmas.h"
 #include "vp9/common/onyxc_int.h"
 
@@ -31,11 +30,6 @@
 #if HAVE_MMX
 // The commented functions need to be re-written for vpx.
   if (flags & HAS_MMX) {
-    rtcd->idct.idct1        = vp9_short_idct4x4llm_1_mmx;
-    rtcd->idct.idct16       = vp9_short_idct4x4llm_mmx;
-    rtcd->idct.idct1_scalar_add = vp9_dc_only_idct_add_mmx;
-    // rtcd->idct.iwalsh16     = vp9_short_inv_walsh4x4_mmx;
-    // rtcd->idct.iwalsh1     = vp9_short_inv_walsh4x4_1_mmx;
 
 #if CONFIG_POSTPROC
     rtcd->postproc.down        = vp9_mbpost_proc_down_mmx;
--- a/vp9/decoder/arm/armv6/idct_blk_v6.c
+++ b/vp9/decoder/arm/armv6/idct_blk_v6.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_ports/config.h"
-#include "vp9/common/idct.h"
+#include "vp9/common/blockd.h"
 #include "vp9/decoder/dequantize.h"
 
 void vp8_dequant_dc_idct_add_y_block_v6(short *q, short *dq,
--- a/vp9/decoder/arm/dequantize_arm.c
+++ b/vp9/decoder/arm/dequantize_arm.c
@@ -11,7 +11,7 @@
 
 #include "vpx_ports/config.h"
 #include "vp9/decoder/dequantize.h"
-#include "vp9/common/idct.h"
+#include "vp9/common/blockd.h"
 #include "vpx_mem/vpx_mem.h"
 
 #if HAVE_ARMV7
--- a/vp9/decoder/arm/neon/idct_blk_neon.c
+++ b/vp9/decoder/arm/neon/idct_blk_neon.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_ports/config.h"
-#include "vp9/common/idct.h"
+#include "vp9/common/blockd.h"
 #include "vp9/decoder/dequantize.h"
 
 /* place these declarations here because we don't want to maintain them
--- a/vp9/decoder/decodframe.c
+++ b/vp9/decoder/decodframe.c
@@ -27,7 +27,6 @@
 #include "vp9/common/extend.h"
 #include "vp9/common/modecont.h"
 #include "vpx_mem/vpx_mem.h"
-#include "vp9/common/idct.h"
 #include "dboolhuff.h"
 
 #include "vp9/common/seg_common.h"
@@ -122,11 +121,10 @@
 
 #if CONFIG_LOSSLESS
   if (!QIndex) {
-    pbi->common.rtcd.idct.idct1        = vp9_short_inv_walsh4x4_1_x8_c;
-    pbi->common.rtcd.idct.idct16       = vp9_short_inv_walsh4x4_x8_c;
-    pbi->common.rtcd.idct.idct1_scalar_add  = vp9_dc_only_inv_walsh_add_c;
-    pbi->common.rtcd.idct.iwalsh1      = vp9_short_inv_walsh4x4_1_lossless_c;
-    pbi->common.rtcd.idct.iwalsh16     = vp9_short_inv_walsh4x4_lossless_c;
+    pbi->mb.inv_xform4x4_1_x8     = vp9_short_inv_walsh4x4_1_x8;
+    pbi->mb.inv_xform4x4_x8       = vp9_short_inv_walsh4x4_x8;
+    pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1_lossless;
+    pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
     pbi->idct_add            = vp9_dequant_idct_add_lossless_c;
     pbi->dc_idct_add         = vp9_dequant_dc_idct_add_lossless_c;
     pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
@@ -133,11 +131,10 @@
     pbi->idct_add_y_block    = vp9_dequant_idct_add_y_block_lossless_c;
     pbi->idct_add_uv_block   = vp9_dequant_idct_add_uv_block_lossless_c;
   } else {
-    pbi->common.rtcd.idct.idct1        = vp9_short_idct4x4llm_1_c;
-    pbi->common.rtcd.idct.idct16       = vp9_short_idct4x4llm_c;
-    pbi->common.rtcd.idct.idct1_scalar_add  = vp9_dc_only_idct_add_c;
-    pbi->common.rtcd.idct.iwalsh1      = vp9_short_inv_walsh4x4_1_c;
-    pbi->common.rtcd.idct.iwalsh16     = vp9_short_inv_walsh4x4_c;
+    pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
+    pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;
+    pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
+    pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
     pbi->idct_add            = vp9_dequant_idct_add;
     pbi->dc_idct_add         = vp9_dequant_dc_idct_add;
     pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
@@ -145,6 +142,10 @@
     pbi->idct_add_uv_block   = vp9_dequant_idct_add_uv_block;
   }
 #else
+  pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
+  pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;
+  pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
+  pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
   pbi->idct_add            = vp9_dequant_idct_add;
   pbi->dc_idct_add         = vp9_dequant_dc_idct_add;
   pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
@@ -305,7 +306,7 @@
           xd->dst.uv_stride, xd->eobs + 16, xd);
     } else if (tx_size == TX_8X8) {
       vp9_dequantize_b_2x2(b);
-      IDCT_INVOKE(RTCD_VTABLE(idct), ihaar2)(&b->dqcoeff[0], b->diff, 8);
+      vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8);
       ((int *)b->qcoeff)[0] = 0;  // 2nd order block are set to 0 after idct
       ((int *)b->qcoeff)[1] = 0;
       ((int *)b->qcoeff)[2] = 0;
@@ -326,7 +327,7 @@
     } else {
       vp9_dequantize_b(b);
       if (xd->eobs[24] > 1) {
-        IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
+        vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff);
         ((int *)b->qcoeff)[0] = 0;
         ((int *)b->qcoeff)[1] = 0;
         ((int *)b->qcoeff)[2] = 0;
@@ -336,7 +337,7 @@
         ((int *)b->qcoeff)[6] = 0;
         ((int *)b->qcoeff)[7] = 0;
       } else {
-        IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
+        xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
         ((int *)b->qcoeff)[0] = 0;
       }
 
@@ -529,12 +530,12 @@
                                         xd->dst.y_buffer, 16, xd->dst.y_stride);
       } else {
         vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant,
-                                     xd->predictor, xd->dst.y_buffer,
-                                     16, xd->dst.y_stride, xd->eobs[0]);
+                                   xd->predictor, xd->dst.y_buffer,
+                                   16, xd->dst.y_stride, xd->eobs[0]);
       }
     } else if (tx_size == TX_8X8) {
       vp9_dequantize_b_2x2(b);
-      IDCT_INVOKE(RTCD_VTABLE(idct), ihaar2)(&b->dqcoeff[0], b->diff, 8);
+      vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8);
       ((int *)b->qcoeff)[0] = 0;  // 2nd order block are set to 0 after idct
       ((int *)b->qcoeff)[1] = 0;
       ((int *)b->qcoeff)[2] = 0;
@@ -543,13 +544,13 @@
       ((int *)b->qcoeff)[5] = 0;
       ((int *)b->qcoeff)[6] = 0;
       ((int *)b->qcoeff)[7] = 0;
-        vp9_dequant_dc_idct_add_y_block_8x8(xd->qcoeff,
+      vp9_dequant_dc_idct_add_y_block_8x8(xd->qcoeff,
           xd->block[0].dequant, xd->predictor, xd->dst.y_buffer,
           xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd);
     } else {
       vp9_dequantize_b(b);
       if (xd->eobs[24] > 1) {
-        IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
+        vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff);
         ((int *)b->qcoeff)[0] = 0;
         ((int *)b->qcoeff)[1] = 0;
         ((int *)b->qcoeff)[2] = 0;
@@ -559,7 +560,7 @@
         ((int *)b->qcoeff)[6] = 0;
         ((int *)b->qcoeff)[7] = 0;
       } else {
-        IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
+        xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
         ((int *)b->qcoeff)[0] = 0;
       }
 
--- a/vp9/decoder/dequantize.c
+++ b/vp9/decoder/dequantize.c
@@ -11,7 +11,6 @@
 
 #include "vp9_rtcd.h"
 #include "dequantize.h"
-#include "vp9/common/idct.h"
 #include "vpx_mem/vpx_mem.h"
 #include "onyxd_int.h"
 
--- a/vp9/decoder/idct_blk.c
+++ b/vp9/decoder/idct_blk.c
@@ -9,7 +9,7 @@
  */
 
 #include "vp9_rtcd.h"
-#include "vp9/common/idct.h"
+#include "vp9/common/blockd.h"
 
 void vp9_dequant_dc_idct_add_y_block_c(short *q, const short *dq,
                                        unsigned char *pre,
--- a/vp9/decoder/x86/idct_blk_mmx.c
+++ b/vp9/decoder/x86/idct_blk_mmx.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_ports/config.h"
-#include "vp9/common/idct.h"
+#include "vp9/common/blockd.h"
 #include "vp9/decoder/dequantize.h"
 
 void vp9_dequant_dc_idct_add_y_block_mmx(short *q, const short *dq,
--- a/vp9/decoder/x86/idct_blk_sse2.c
+++ b/vp9/decoder/x86/idct_blk_sse2.c
@@ -9,7 +9,7 @@
  */
 
 #include "vpx_ports/config.h"
-#include "vp9/common/idct.h"
+#include "vp9/common/blockd.h"
 #include "vp9/decoder/dequantize.h"
 
 void vp9_idct_dequant_dc_0_2x_sse2(short *q, const short *dq,
--- a/vp9/encoder/dct.c
+++ b/vp9/encoder/dct.c
@@ -12,7 +12,6 @@
 #include <assert.h>
 #include <math.h>
 #include "vpx_ports/config.h"
-#include "vp9/common/idct.h"
 #include "vp9/common/systemdependent.h"
 
 #include "vp9/common/blockd.h"
--- a/vp9/encoder/encodeframe.c
+++ b/vp9/encoder/encodeframe.c
@@ -2077,14 +2077,14 @@
 
   if (mbmi->ref_frame == INTRA_FRAME) {
     if (mbmi->mode == B_PRED) {
-      vp9_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
-      vp9_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
+      vp9_encode_intra16x16mbuv(x);
+      vp9_encode_intra4x4mby(x);
     } else if (mbmi->mode == I8X8_PRED) {
-      vp9_encode_intra8x8mby(IF_RTCD(&cpi->rtcd), x);
-      vp9_encode_intra8x8mbuv(IF_RTCD(&cpi->rtcd), x);
+      vp9_encode_intra8x8mby(x);
+      vp9_encode_intra8x8mbuv(x);
     } else {
-      vp9_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
-      vp9_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
+      vp9_encode_intra16x16mbuv(x);
+      vp9_encode_intra16x16mby(x);
     }
 
     if (output_enabled)
@@ -2124,7 +2124,7 @@
     }
 
     if (!x->skip) {
-      vp9_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
+      vp9_encode_inter16x16(x);
 
       // Clear mb_skip_coeff if mb_no_coeff_skip is not set
       if (!cpi->common.mb_no_coeff_skip)
@@ -2226,7 +2226,6 @@
   uint8_t *vdst = xd->dst.v_buffer;
   int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
   int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
-  const VP9_ENCODER_RTCD *rtcd = IF_RTCD(&cpi->rtcd);
   int seg_ref_active;
   unsigned char ref_pred_flag;
   int n;
@@ -2343,7 +2342,7 @@
                           udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
                           vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
                           dst_uv_stride);
-    vp9_fidct_mb(x, rtcd);
+    vp9_fidct_mb(x);
     vp9_recon_mby_s_c(&x->e_mbd,
                       dst + x_idx * 16 + y_idx * 16 * dst_y_stride);
     vp9_recon_mbuv_s_c(&x->e_mbd,
--- a/vp9/encoder/encodeintra.c
+++ b/vp9/encoder/encodeintra.c
@@ -10,7 +10,6 @@
 
 #include "vpx_ports/config.h"
 #include "vp9_rtcd.h"
-#include "vp9/common/idct.h"
 #include "quantize.h"
 #include "vp9/common/reconintra.h"
 #include "vp9/common/reconintra4x4.h"
@@ -38,11 +37,11 @@
     mbmi->uv_mode = DC_PRED;
     mbmi->ref_frame = INTRA_FRAME;
 
-    vp9_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
+    vp9_encode_intra16x16mby(x);
   } else {
     for (i = 0; i < 16; i++) {
       x->e_mbd.block[i].bmi.as_mode.first = B_DC_PRED;
-      vp9_encode_intra4x4block(IF_RTCD(&cpi->rtcd), x, i);
+      vp9_encode_intra4x4block(x, i);
     }
   }
 
@@ -51,8 +50,7 @@
   return intra_pred_var;
 }
 
-void vp9_encode_intra4x4block(const VP9_ENCODER_RTCD *rtcd,
-                              MACROBLOCK *x, int ib) {
+void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
   BLOCKD *b = &x->e_mbd.block[ib];
   BLOCK *be = &x->block[ib];
   TX_TYPE tx_type;
@@ -82,21 +80,21 @@
   } else {
     x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
     x->quantize_b_4x4(be, b) ;
-    vp9_inverse_transform_b_4x4(IF_RTCD(&rtcd->common->idct), b, 32);
+    vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32);
   }
 
   vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 }
 
-void vp9_encode_intra4x4mby(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *mb) {
+void vp9_encode_intra4x4mby(MACROBLOCK *mb) {
   int i;
 
   for (i = 0; i < 16; i++)
-    vp9_encode_intra4x4block(rtcd, mb, i);
+    vp9_encode_intra4x4block(mb, i);
   return;
 }
 
-void vp9_encode_intra16x16mby(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
+void vp9_encode_intra16x16mby(MACROBLOCK *x) {
   MACROBLOCKD *xd = &x->e_mbd;
   BLOCK *b = &x->block[0];
   TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
@@ -120,33 +118,33 @@
       vp9_fht(b->src_diff, 32, b->coeff, tx_type, 16);
       vp9_quantize_mby_16x16(x);
       if (x->optimize)
-        vp9_optimize_mby_16x16(x, rtcd);
+        vp9_optimize_mby_16x16(x);
       vp9_ihtllm_c(bd->dqcoeff, bd->diff, 32, tx_type, 16);
     } else {
       vp9_transform_mby_16x16(x);
       vp9_quantize_mby_16x16(x);
       if (x->optimize)
-        vp9_optimize_mby_16x16(x, rtcd);
-      vp9_inverse_transform_mby_16x16(IF_RTCD(&rtcd->common->idct), xd);
+        vp9_optimize_mby_16x16(x);
+      vp9_inverse_transform_mby_16x16(xd);
     }
   } else if (tx_size == TX_8X8) {
     vp9_transform_mby_8x8(x);
     vp9_quantize_mby_8x8(x);
     if (x->optimize)
-      vp9_optimize_mby_8x8(x, rtcd);
-    vp9_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), xd);
+      vp9_optimize_mby_8x8(x);
+    vp9_inverse_transform_mby_8x8(xd);
   } else {
     vp9_transform_mby_4x4(x);
     vp9_quantize_mby_4x4(x);
     if (x->optimize)
-      vp9_optimize_mby_4x4(x, rtcd);
-    vp9_inverse_transform_mby_4x4(IF_RTCD(&rtcd->common->idct), xd);
+      vp9_optimize_mby_4x4(x);
+    vp9_inverse_transform_mby_4x4(xd);
   }
 
   vp9_recon_mby(xd);
 }
 
-void vp9_encode_intra16x16mbuv(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
+void vp9_encode_intra16x16mbuv(MACROBLOCK *x) {
   MACROBLOCKD *xd = &x->e_mbd;
   TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
 
@@ -167,21 +165,20 @@
     vp9_transform_mbuv_4x4(x);
     vp9_quantize_mbuv_4x4(x);
     if (x->optimize)
-      vp9_optimize_mbuv_4x4(x, rtcd);
-    vp9_inverse_transform_mbuv_4x4(IF_RTCD(&rtcd->common->idct), xd);
+      vp9_optimize_mbuv_4x4(x);
+    vp9_inverse_transform_mbuv_4x4(xd);
   } else /* 16x16 or 8x8 */ {
     vp9_transform_mbuv_8x8(x);
     vp9_quantize_mbuv_8x8(x);
     if (x->optimize)
-      vp9_optimize_mbuv_8x8(x, rtcd);
-    vp9_inverse_transform_mbuv_8x8(IF_RTCD(&rtcd->common->idct), xd);
+      vp9_optimize_mbuv_8x8(x);
+    vp9_inverse_transform_mbuv_8x8(xd);
   }
 
   vp9_recon_intra_mbuv(xd);
 }
 
-void vp9_encode_intra8x8(const VP9_ENCODER_RTCD *rtcd,
-                         MACROBLOCK *x, int ib) {
+void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
   MACROBLOCKD *xd = &x->e_mbd;
   BLOCKD *b = &xd->block[ib];
   BLOCK *be = &x->block[ib];
@@ -216,7 +213,7 @@
     } else {
       x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
       x->quantize_b_8x8(x->block + idx, xd->block + idx);
-      vp9_idct_idct8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
+      vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
     }
   } else {
     for (i = 0; i < 4; i++) {
@@ -225,7 +222,7 @@
       vp9_subtract_b(be, b, 16);
       x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
       x->quantize_b_4x4(be, b);
-      vp9_inverse_transform_b_4x4(IF_RTCD(&rtcd->common->idct), b, 32);
+      vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
     }
   }
 
@@ -237,17 +234,16 @@
   }
 }
 
-void vp9_encode_intra8x8mby(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
+void vp9_encode_intra8x8mby(MACROBLOCK *x) {
   int i, ib;
 
   for (i = 0; i < 4; i++) {
     ib = vp9_i8x8_block[i];
-    vp9_encode_intra8x8(rtcd, x, ib);
+    vp9_encode_intra8x8(x, ib);
   }
 }
 
-void vp9_encode_intra_uv4x4(const VP9_ENCODER_RTCD *rtcd,
-                            MACROBLOCK *x, int ib,
+void vp9_encode_intra_uv4x4(MACROBLOCK *x, int ib,
                             int mode, int second) {
   BLOCKD *b = &x->e_mbd.block[ib];
   BLOCK *be = &x->block[ib];
@@ -266,13 +262,13 @@
 
   x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16);
   x->quantize_b_4x4(be, b);
-  vp9_inverse_transform_b_4x4(IF_RTCD(&rtcd->common->idct), b, 16);
+  vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16);
 
   vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst,
                    b->dst_stride);
 }
 
-void vp9_encode_intra8x8mbuv(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
+void vp9_encode_intra8x8mbuv(MACROBLOCK *x) {
   int i, ib, mode, second;
   BLOCKD *b;
 
@@ -286,8 +282,8 @@
     second = -1;
 #endif
     /*u */
-    vp9_encode_intra_uv4x4(rtcd, x, i + 16, mode, second);
+    vp9_encode_intra_uv4x4(x, i + 16, mode, second);
     /*v */
-    vp9_encode_intra_uv4x4(rtcd, x, i + 20, mode, second);
+    vp9_encode_intra_uv4x4(x, i + 20, mode, second);
   }
 }
--- a/vp9/encoder/encodeintra.h
+++ b/vp9/encoder/encodeintra.h
@@ -14,14 +14,12 @@
 #include "onyx_int.h"
 
 int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred);
-void vp9_encode_intra16x16mby(const VP9_ENCODER_RTCD *, MACROBLOCK *x);
-void vp9_encode_intra16x16mbuv(const VP9_ENCODER_RTCD *, MACROBLOCK *x);
-void vp9_encode_intra4x4mby(const VP9_ENCODER_RTCD *, MACROBLOCK *mb);
-void vp9_encode_intra4x4block(const VP9_ENCODER_RTCD *rtcd,
-                              MACROBLOCK *x, int ib);
-void vp9_encode_intra8x8mby(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x);
-void vp9_encode_intra8x8mbuv(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x);
-void vp9_encode_intra8x8(const VP9_ENCODER_RTCD *rtcd,
-                         MACROBLOCK *x, int ib);
+void vp9_encode_intra16x16mby(MACROBLOCK *x);
+void vp9_encode_intra16x16mbuv(MACROBLOCK *x);
+void vp9_encode_intra4x4mby(MACROBLOCK *mb);
+void vp9_encode_intra4x4block(MACROBLOCK *x, int ib);
+void vp9_encode_intra8x8mby(MACROBLOCK *x);
+void vp9_encode_intra8x8mbuv(MACROBLOCK *x);
+void vp9_encode_intra8x8(MACROBLOCK *x, int ib);
 
 #endif  // __ENCODEINTRA_H_
--- a/vp9/encoder/encodemb.c
+++ b/vp9/encoder/encodemb.c
@@ -20,12 +20,6 @@
 #include "vp9/common/systemdependent.h"
 #include "vp9_rtcd.h"
 
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IF_RTCD(x) (x)
-#else
-#define IF_RTCD(x) NULL
-#endif
-
 void vp9_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch) {
   unsigned char *src_ptr = (*(be->base_src) + be->src);
   short *diff_ptr = be->src_diff;
@@ -119,7 +113,7 @@
   vp9_subtract_mby_s_c(diff, src, stride, pred, 16);
 }
 
-static void subtract_mb(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
+static void subtract_mb(MACROBLOCK *x) {
   BLOCK *b = &x->block[0];
 
   vp9_subtract_mby(x->src_diff, *(b->base_src), x->e_mbd.predictor,
@@ -265,7 +259,7 @@
 
 static void optimize_b(MACROBLOCK *mb, int i, PLANE_TYPE type,
                        ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
-                       const VP9_ENCODER_RTCD *rtcd, int tx_size) {
+                       int tx_size) {
   BLOCK *b;
   BLOCKD *d;
   vp9_token_state tokens[65][2];
@@ -567,7 +561,7 @@
   }
 }
 
-void vp9_optimize_mby_4x4(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) {
+void vp9_optimize_mby_4x4(MACROBLOCK *x) {
   int b;
   PLANE_TYPE type;
   int has_2nd_order;
@@ -590,19 +584,19 @@
 
   for (b = 0; b < 16; b++) {
     optimize_b(x, b, type,
-               ta + vp9_block2above[b], tl + vp9_block2left[b], rtcd, TX_4X4);
+               ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
   }
 
   if (has_2nd_order) {
     b = 24;
     optimize_b(x, b, PLANE_TYPE_Y2,
-               ta + vp9_block2above[b], tl + vp9_block2left[b], rtcd, TX_4X4);
+               ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
     check_reset_2nd_coeffs(&x->e_mbd,
                            ta + vp9_block2above[b], tl + vp9_block2left[b]);
   }
 }
 
-void vp9_optimize_mbuv_4x4(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) {
+void vp9_optimize_mbuv_4x4(MACROBLOCK *x) {
   int b;
   ENTROPY_CONTEXT_PLANES t_above, t_left;
   ENTROPY_CONTEXT *ta;
@@ -619,16 +613,16 @@
 
   for (b = 16; b < 24; b++) {
     optimize_b(x, b, PLANE_TYPE_UV,
-               ta + vp9_block2above[b], tl + vp9_block2left[b], rtcd, TX_4X4);
+               ta + vp9_block2above[b], tl + vp9_block2left[b], TX_4X4);
   }
 }
 
-static void optimize_mb_4x4(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) {
-  vp9_optimize_mby_4x4(x, rtcd);
-  vp9_optimize_mbuv_4x4(x, rtcd);
+static void optimize_mb_4x4(MACROBLOCK *x) {
+  vp9_optimize_mby_4x4(x);
+  vp9_optimize_mbuv_4x4(x);
 }
 
-void vp9_optimize_mby_8x8(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) {
+void vp9_optimize_mby_8x8(MACROBLOCK *x) {
   int b;
   PLANE_TYPE type;
   ENTROPY_CONTEXT_PLANES t_above, t_left;
@@ -648,7 +642,7 @@
   for (b = 0; b < 16; b += 4) {
     optimize_b(x, b, type,
                ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b],
-               rtcd, TX_8X8);
+               TX_8X8);
     ta[vp9_block2above_8x8[b] + 1] = ta[vp9_block2above_8x8[b]];
     tl[vp9_block2left_8x8[b] + 1]  = tl[vp9_block2left_8x8[b]];
   }
@@ -661,7 +655,7 @@
   }
 }
 
-void vp9_optimize_mbuv_8x8(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) {
+void vp9_optimize_mbuv_8x8(MACROBLOCK *x) {
   int b;
   ENTROPY_CONTEXT_PLANES t_above, t_left;
   ENTROPY_CONTEXT *ta;
@@ -679,20 +673,19 @@
   for (b = 16; b < 24; b += 4) {
     optimize_b(x, b, PLANE_TYPE_UV,
                ta + vp9_block2above_8x8[b], tl + vp9_block2left_8x8[b],
-               rtcd, TX_8X8);
+               TX_8X8);
     ta[vp9_block2above_8x8[b] + 1] = ta[vp9_block2above_8x8[b]];
     tl[vp9_block2left_8x8[b] + 1]  = tl[vp9_block2left_8x8[b]];
   }
 }
 
-static void optimize_mb_8x8(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) {
-  vp9_optimize_mby_8x8(x, rtcd);
-  vp9_optimize_mbuv_8x8(x, rtcd);
+static void optimize_mb_8x8(MACROBLOCK *x) {
+  vp9_optimize_mby_8x8(x);
+  vp9_optimize_mbuv_8x8(x);
 }
 
 static void optimize_b_16x16(MACROBLOCK *mb, int i, PLANE_TYPE type,
-                             ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
-                             const VP9_ENCODER_RTCD *rtcd) {
+                             ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
   BLOCK *b = &mb->block[i];
   BLOCKD *d = &mb->e_mbd.block[i];
   vp9_token_state tokens[257][2];
@@ -864,7 +857,7 @@
   *a = *l = (d->eob != !type);
 }
 
-void vp9_optimize_mby_16x16(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) {
+void vp9_optimize_mby_16x16(MACROBLOCK *x) {
   ENTROPY_CONTEXT_PLANES t_above, t_left;
   ENTROPY_CONTEXT *ta, *tl;
 
@@ -876,15 +869,15 @@
 
   ta = (ENTROPY_CONTEXT *)&t_above;
   tl = (ENTROPY_CONTEXT *)&t_left;
-  optimize_b_16x16(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl, rtcd);
+  optimize_b_16x16(x, 0, PLANE_TYPE_Y_WITH_DC, ta, tl);
 }
 
-static void optimize_mb_16x16(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) {
-  vp9_optimize_mby_16x16(x, rtcd);
-  vp9_optimize_mbuv_8x8(x, rtcd);
+static void optimize_mb_16x16(MACROBLOCK *x) {
+  vp9_optimize_mby_16x16(x);
+  vp9_optimize_mbuv_8x8(x);
 }
 
-void vp9_fidct_mb(MACROBLOCK *x, const VP9_ENCODER_RTCD *rtcd) {
+void vp9_fidct_mb(MACROBLOCK *x) {
   MACROBLOCKD *const xd = &x->e_mbd;
   TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
 
@@ -892,8 +885,8 @@
     vp9_transform_mb_16x16(x);
     vp9_quantize_mb_16x16(x);
     if (x->optimize)
-      optimize_mb_16x16(x, rtcd);
-    vp9_inverse_transform_mb_16x16(IF_RTCD(&rtcd->common->idct), xd);
+      optimize_mb_16x16(x);
+    vp9_inverse_transform_mb_16x16(xd);
   } else if (tx_size == TX_8X8) {
     if (xd->mode_info_context->mbmi.mode == SPLITMV) {
       assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4);
@@ -902,38 +895,38 @@
       vp9_quantize_mby_8x8(x);
       vp9_quantize_mbuv_4x4(x);
       if (x->optimize) {
-        vp9_optimize_mby_8x8(x, rtcd);
-        vp9_optimize_mbuv_4x4(x, rtcd);
+        vp9_optimize_mby_8x8(x);
+        vp9_optimize_mbuv_4x4(x);
       }
-      vp9_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), xd);
-      vp9_inverse_transform_mbuv_4x4(IF_RTCD(&rtcd->common->idct), xd);
+      vp9_inverse_transform_mby_8x8(xd);
+      vp9_inverse_transform_mbuv_4x4(xd);
     } else {
       vp9_transform_mb_8x8(x);
       vp9_quantize_mb_8x8(x);
       if (x->optimize)
-        optimize_mb_8x8(x, rtcd);
-      vp9_inverse_transform_mb_8x8(IF_RTCD(&rtcd->common->idct), xd);
+        optimize_mb_8x8(x);
+      vp9_inverse_transform_mb_8x8(xd);
     }
   } else {
     transform_mb_4x4(x);
     vp9_quantize_mb_4x4(x);
     if (x->optimize)
-      optimize_mb_4x4(x, rtcd);
-    vp9_inverse_transform_mb_4x4(IF_RTCD(&rtcd->common->idct), xd);
+      optimize_mb_4x4(x);
+    vp9_inverse_transform_mb_4x4(xd);
   }
 }
 
-void vp9_encode_inter16x16(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
+void vp9_encode_inter16x16(MACROBLOCK *x) {
   MACROBLOCKD *const xd = &x->e_mbd;
 
   vp9_build_inter_predictors_mb(xd);
-  subtract_mb(rtcd, x);
-  vp9_fidct_mb(x, rtcd);
+  subtract_mb(x);
+  vp9_fidct_mb(x);
   vp9_recon_mb(xd);
 }
 
 /* this function is used by first pass only */
-void vp9_encode_inter16x16y(const VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
+void vp9_encode_inter16x16y(MACROBLOCK *x) {
   MACROBLOCKD *xd = &x->e_mbd;
   BLOCK *b = &x->block[0];
 
@@ -948,7 +941,7 @@
 
   vp9_transform_mby_4x4(x);
   vp9_quantize_mby_4x4(x);
-  vp9_inverse_transform_mby_4x4(IF_RTCD(&rtcd->common->idct), xd);
+  vp9_inverse_transform_mby_4x4(xd);
 
   vp9_recon_mby(xd);
 }
--- a/vp9/encoder/encodemb.h
+++ b/vp9/encoder/encodemb.h
@@ -35,27 +35,27 @@
 
 #include "onyx_int.h"
 struct VP9_ENCODER_RTCD;
-void vp9_encode_inter16x16(const struct VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x);
+void vp9_encode_inter16x16(MACROBLOCK *x);
 
 void vp9_transform_mbuv_4x4(MACROBLOCK *x);
 void vp9_transform_mby_4x4(MACROBLOCK *x);
 
-void vp9_optimize_mby_4x4(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd);
-void vp9_optimize_mbuv_4x4(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd);
-void vp9_encode_inter16x16y(const struct VP9_ENCODER_RTCD *rtcd, MACROBLOCK *x);
+void vp9_optimize_mby_4x4(MACROBLOCK *x);
+void vp9_optimize_mbuv_4x4(MACROBLOCK *x);
+void vp9_encode_inter16x16y(MACROBLOCK *x);
 
 void vp9_transform_mb_8x8(MACROBLOCK *mb);
 void vp9_transform_mby_8x8(MACROBLOCK *x);
 void vp9_transform_mbuv_8x8(MACROBLOCK *x);
 void vp9_build_dcblock_8x8(MACROBLOCK *b);
-void vp9_optimize_mby_8x8(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd);
-void vp9_optimize_mbuv_8x8(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd);
+void vp9_optimize_mby_8x8(MACROBLOCK *x);
+void vp9_optimize_mbuv_8x8(MACROBLOCK *x);
 
 void vp9_transform_mb_16x16(MACROBLOCK *mb);
 void vp9_transform_mby_16x16(MACROBLOCK *x);
-void vp9_optimize_mby_16x16(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd);
+void vp9_optimize_mby_16x16(MACROBLOCK *x);
 
-void vp9_fidct_mb(MACROBLOCK *x, const struct VP9_ENCODER_RTCD *rtcd);
+void vp9_fidct_mb(MACROBLOCK *x);
 
 void vp9_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch);
 
--- a/vp9/encoder/firstpass.c
+++ b/vp9/encoder/firstpass.c
@@ -621,7 +621,7 @@
           this_error = motion_error;
           vp9_set_mbmode_and_mvs(x, NEWMV, &mv);
           xd->mode_info_context->mbmi.txfm_size = TX_4X4;
-          vp9_encode_inter16x16y(IF_RTCD(&cpi->rtcd), x);
+          vp9_encode_inter16x16y(x);
           sum_mvr += mv.as_mv.row;
           sum_mvr_abs += abs(mv.as_mv.row);
           sum_mvc += mv.as_mv.col;
--- a/vp9/encoder/onyx_if.c
+++ b/vp9/encoder/onyx_if.c
@@ -23,6 +23,7 @@
 #include "ratectrl.h"
 #include "vp9/common/quant_common.h"
 #include "segmentation.h"
+#include "./vp9_rtcd.h"
 #if CONFIG_POSTPROC
 #include "vp9/common/postproc.h"
 #endif
@@ -1267,8 +1268,6 @@
   }
 #endif
 
-
-
   cpi->mb.quantize_b_4x4      = vp9_regular_quantize_b_4x4;
   cpi->mb.quantize_b_4x4_pair = vp9_regular_quantize_b_4x4_pair;
   cpi->mb.quantize_b_8x8      = vp9_regular_quantize_b_8x8;
@@ -1584,14 +1583,18 @@
   cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
   cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
 
+  cpi->mb.e_mbd.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
+  cpi->mb.e_mbd.inv_xform4x4_x8       = vp9_short_idct4x4llm;
+  cpi->mb.e_mbd.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
+  cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
+
 #if CONFIG_LOSSLESS
   cpi->oxcf.lossless = oxcf->lossless;
   if (cpi->oxcf.lossless) {
-    cpi->common.rtcd.idct.idct1        = vp9_short_inv_walsh4x4_1_x8_c;
-    cpi->common.rtcd.idct.idct16       = vp9_short_inv_walsh4x4_x8_c;
-    cpi->common.rtcd.idct.idct1_scalar_add  = vp9_dc_only_inv_walsh_add_c;
-    cpi->common.rtcd.idct.iwalsh1      = vp9_short_inv_walsh4x4_1_c;
-    cpi->common.rtcd.idct.iwalsh16     = vp9_short_inv_walsh4x4_lossless_c;
+    cpi->mb.e_mbd.inv_xform4x4_1_x8     = vp9_short_inv_walsh4x4_1_x8;
+    cpi->mb.e_mbd.inv_xform4x4_x8       = vp9_short_inv_walsh4x4_x8;
+    cpi->mb.e_mbd.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1_lossless;
+    cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
   }
 #endif
 
--- a/vp9/encoder/rdopt.c
+++ b/vp9/encoder/rdopt.c
@@ -28,7 +28,6 @@
 #include "vp9/common/quant_common.h"
 #include "encodemb.h"
 #include "quantize.h"
-#include "vp9/common/idct.h"
 #include "variance.h"
 #include "mcomp.h"
 #include "rdopt.h"
@@ -680,7 +679,6 @@
 static void macro_block_yrd_4x4(MACROBLOCK *mb,
                                 int *Rate,
                                 int *Distortion,
-                                const VP9_ENCODER_RTCD *rtcd,
                                 int *skippable, int backup) {
   int b;
   MACROBLOCKD *const xd = &mb->e_mbd;
@@ -751,7 +749,6 @@
 static void macro_block_yrd_8x8(MACROBLOCK *mb,
                                 int *Rate,
                                 int *Distortion,
-                                const VP9_ENCODER_RTCD *rtcd,
                                 int *skippable, int backup) {
   MACROBLOCKD *const xd = &mb->e_mbd;
   BLOCK   *const mb_y2 = mb->block + 24;
@@ -802,8 +799,7 @@
 }
 
 static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
-                                  const VP9_ENCODER_RTCD *rtcd, int *skippable,
-                                  int backup) {
+                                  int *skippable, int backup) {
   int d;
   MACROBLOCKD *xd = &mb->e_mbd;
   BLOCKD *b  = &mb->e_mbd.block[0];
@@ -821,7 +817,7 @@
   //                trailing coefficients to be zero, instead of running trellis
   //                optimization in the rate-distortion optimization loop?
   if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
-    vp9_optimize_mby_16x16(mb, rtcd);
+    vp9_optimize_mby_16x16(mb);
 
   d = vp9_mbblock_error(mb, 0);
 
@@ -902,7 +898,6 @@
 static void macro_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
                             int *distortion, int *skippable,
                             int64_t txfm_cache[NB_TXFM_MODES]) {
-  VP9_COMMON *cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
   int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX];
 
@@ -910,11 +905,9 @@
                    x->block[0].src_stride);
 
   macro_block_yrd_16x16(x, &r[0][TX_16X16], &d[TX_16X16],
-                        IF_RTCD(&cpi->rtcd), &s[TX_16X16], 1);
-  macro_block_yrd_8x8(x, &r[0][TX_8X8], &d[TX_8X8],
-                      IF_RTCD(&cpi->rtcd), &s[TX_8X8], 1);
-  macro_block_yrd_4x4(x, &r[0][TX_4X4], &d[TX_4X4],
-                      IF_RTCD(&cpi->rtcd), &s[TX_4X4], 1);
+                        &s[TX_16X16], 1);
+  macro_block_yrd_8x8(x, &r[0][TX_8X8], &d[TX_8X8], &s[TX_8X8], 1);
+  macro_block_yrd_4x4(x, &r[0][TX_4X4], &d[TX_4X4], &s[TX_4X4], 1);
 
   choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skippable,
                            txfm_cache);
@@ -932,7 +925,7 @@
 #if CONFIG_SUPERBLOCKS
 static void super_block_yrd(VP9_COMP *cpi,
                             MACROBLOCK *x, int *rate, int *distortion,
-                            const VP9_ENCODER_RTCD *rtcd, int *skip,
+                            int *skip,
                             int64_t txfm_cache[NB_TXFM_MODES]) {
   MACROBLOCKD *const xd = &x->e_mbd;
   int r[2][TX_SIZE_MAX], d[TX_SIZE_MAX], s[TX_SIZE_MAX], n;
@@ -961,7 +954,7 @@
 
     xd->above_context = &t_above[TX_16X16][x_idx];
     xd->left_context = &t_left[TX_16X16][y_idx];
-    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, IF_RTCD(&cpi->rtcd), &s_tmp, 0);
+    macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0);
     d[TX_16X16] += d_tmp;
     r[0][TX_16X16] += r_tmp;
     s[TX_16X16] = s[TX_16X16] && s_tmp;
@@ -968,7 +961,7 @@
 
     xd->above_context = &t_above[TX_4X4][x_idx];
     xd->left_context = &t_left[TX_4X4][y_idx];
-    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, IF_RTCD(&cpi->rtcd), &s_tmp, 0);
+    macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0);
     d[TX_4X4] += d_tmp;
     r[0][TX_4X4] += r_tmp;
     s[TX_4X4] = s[TX_4X4] && s_tmp;
@@ -975,7 +968,7 @@
 
     xd->above_context = &t_above[TX_8X8][x_idx];
     xd->left_context = &t_left[TX_8X8][y_idx];
-    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, IF_RTCD(&cpi->rtcd), &s_tmp, 0);
+    macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0);
     d[TX_8X8] += d_tmp;
     r[0][TX_8X8] += r_tmp;
     s[TX_8X8] = s[TX_8X8] && s_tmp;
@@ -1144,8 +1137,7 @@
   if (best_tx_type != DCT_DCT)
     vp9_ihtllm_c(best_dqcoeff, b->diff, 32, best_tx_type, 4);
   else
-    IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(
-        best_dqcoeff, b->diff, 32);
+    xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);
 
   vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
@@ -1261,7 +1253,7 @@
     vp9_build_intra_predictors_sby_s(&x->e_mbd);
 
     super_block_yrd(cpi, x, &this_rate_tokenonly,
-                    &this_distortion, IF_RTCD(&cpi->rtcd), &s, txfm_cache);
+                    &this_distortion, &s, txfm_cache);
     this_rate = this_rate_tokenonly +
                 x->mbmode_cost[x->e_mbd.frame_type]
                               [x->e_mbd.mode_info_context->mbmi.mode];
@@ -1509,7 +1501,7 @@
 #if CONFIG_COMP_INTRA_PRED
   b->bmi.as_mode.second = (*best_second_mode);
 #endif
-  vp9_encode_intra8x8(IF_RTCD(&cpi->rtcd), x, ib);
+  vp9_encode_intra8x8(x, ib);
 
   if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
     a[vp9_block2above_8x8[idx]]     = besta0;
@@ -1846,7 +1838,6 @@
 static void super_block_uvrd_8x8(MACROBLOCK *x,
                                  int *rate,
                                  int *distortion,
-                                 const VP9_ENCODER_RTCD *rtcd,
                                  int *skippable) {
   MACROBLOCKD *const xd = &x->e_mbd;
   int d = 0, r = 0, n, s = 1;
@@ -1909,7 +1900,7 @@
     vp9_build_intra_predictors_sbuv_s(&x->e_mbd);
 
     super_block_uvrd_8x8(x, &this_rate_tokenonly,
-                         &this_distortion, IF_RTCD(&cpi->rtcd), &s);
+                         &this_distortion, &s);
     this_rate = this_rate_tokenonly +
                 x->intra_uv_mode_cost[x->e_mbd.frame_type][mode];
     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
@@ -2072,8 +2063,7 @@
                                        int *labelyrate,
                                        int *distortion,
                                        ENTROPY_CONTEXT *ta,
-                                       ENTROPY_CONTEXT *tl,
-                                       const VP9_ENCODER_RTCD *rtcd) {
+                                       ENTROPY_CONTEXT *tl) {
   int i;
   MACROBLOCKD *xd = &x->e_mbd;
 
@@ -2109,8 +2099,7 @@
                                            int *distortion,
                                            int64_t *otherrd,
                                            ENTROPY_CONTEXT *ta,
-                                           ENTROPY_CONTEXT *tl,
-                                           const VP9_ENCODER_RTCD *rtcd) {
+                                           ENTROPY_CONTEXT *tl) {
   int i, j;
   MACROBLOCKD *xd = &x->e_mbd;
   const int iblock[4] = { 0, 1, 4, 5 };
@@ -2431,13 +2420,12 @@
 
       if (segmentation == PARTITIONING_4X4) {
         this_rd = encode_inter_mb_segment(x, labels, i, &labelyrate,
-                                          &distortion,
-                                          ta_s, tl_s, IF_RTCD(&cpi->rtcd));
+                                          &distortion, ta_s, tl_s);
         other_rd = this_rd;
       } else {
         this_rd = encode_inter_mb_segment_8x8(x, labels, i, &labelyrate,
                                               &distortion, &other_rd,
-                                              ta_s, tl_s, IF_RTCD(&cpi->rtcd));
+                                              ta_s, tl_s);
       }
       this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
       rate += labelyrate;
@@ -3532,7 +3520,7 @@
 
       // Y cost and distortion
       super_block_yrd(cpi, x, rate_y, distortion_y,
-                      IF_RTCD(&cpi->rtcd), &skippable_y, txfm_cache);
+                      &skippable_y, txfm_cache);
       *rate2 += *rate_y;
       *distortion += *distortion_y;
 
@@ -4713,7 +4701,7 @@
     if (ref_frame == INTRA_FRAME) {
       vp9_build_intra_predictors_sby_s(xd);
       super_block_yrd(cpi, x, &rate_y, &distortion_y,
-                      IF_RTCD(&cpi->rtcd), &skippable, txfm_cache);
+                      &skippable, txfm_cache);
       if (mbmi->txfm_size == TX_4X4) {
         rate_uv = rate_uv_4x4;
         distortion_uv = dist_uv_4x4;