shithub: opus

Download patch

ref: 71fb707875b95672f0cd1cb153c890eff4219720
parent: affb551e47052d5b9a0e37c681c816a6cf4159a7
author: Timothy B. Terriberry <territim@amazon.com>
date: Wed Jul 6 11:21:16 EDT 2022

Don't compile x86 cpu detection without RTCD.

Also #error if RTCD is enabled without a detection method, like Arm.
A number of SILK functions also still used the lookup tables, even
 when RTCD was disabled.
Fix those, too.

--- a/celt/cpu_support.h
+++ b/celt/cpu_support.h
@@ -43,10 +43,11 @@
  */
 #define OPUS_ARCHMASK 3
 
-#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+#elif defined(OPUS_HAVE_RTCD) && \
+  ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
   (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
   (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
-  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
+  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
 
 #include "x86/x86cpu.h"
 /* We currently support 5 x86 variants:
--- a/celt/x86/x86cpu.c
+++ b/celt/x86/x86cpu.c
@@ -35,12 +35,12 @@
 #include "pitch.h"
 #include "x86cpu.h"
 
-#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
+#if defined(OPUS_HAVE_RTCD) && \
+  ((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
   (defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
   (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
-  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
+  (defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
 
-
 #if defined(_MSC_VER)
 
 #include <intrin.h>
@@ -91,6 +91,9 @@
             what we want on CPUs that don't support CPUID. */
         CPUInfo[3] = CPUInfo[2] = CPUInfo[1] = CPUInfo[0] = 0;
     }
+#else
+# error "Configured to use x86 RTCD, but no CPU detection method available. " \
+ "Reconfigure with --disable-rtcd (or send patches)."
 #endif
 }
 
--- a/silk/SigProc_FIX.h
+++ b/silk/SigProc_FIX.h
@@ -609,10 +609,12 @@
 /* the following seems faster on x86 */
 #define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
 
-#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#if !defined(OVERRIDE_silk_burg_modified)
 #define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
     ((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
+#endif
 
+#if !defined(OVERRIDE_silk_inner_prod16)
 #define silk_inner_prod16(inVec1, inVec2, len, arch) \
     ((void)(arch),silk_inner_prod16_c(inVec1, inVec2, len))
 #endif
--- a/silk/x86/SigProc_FIX_sse.h
+++ b/silk/x86/SigProc_FIX_sse.h
@@ -46,10 +46,12 @@
 );
 
 #  if defined(OPUS_X86_PRESUME_SSE4_1)
+
+#   define OVERRIDE_silk_burg_modified
 #   define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
        ((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
 
-#  else
+#  elif defined(OPUS_HAVE_RTCD)
 
 extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])(
     opus_int32                  *res_nrg,           /* O    Residual energy                                             */
@@ -62,6 +64,7 @@
     const opus_int              D,                  /* I    Order                                                       */
     int                         arch                /* I    Run-time architecture                                       */);
 
+#   define OVERRIDE_silk_burg_modified
 #   define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
      ((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
 
@@ -76,10 +79,11 @@
 
 #  if defined(OPUS_X86_PRESUME_SSE4_1)
 
+#   define OVERRIDE_silk_inner_prod16
 #   define silk_inner_prod16(inVec1, inVec2, len, arch) \
        ((void)(arch),silk_inner_prod16_sse4_1(inVec1, inVec2, len))
 
-#  else
+#  elif defined(OPUS_HAVE_RTCD)
 
 extern opus_int64 (*const SILK_INNER_PROD16_IMPL[OPUS_ARCHMASK + 1])(
                     const opus_int16 *inVec1,
@@ -86,6 +90,7 @@
                     const opus_int16 *inVec2,
                     const opus_int   len);
 
+#   define OVERRIDE_silk_inner_prod16
 #   define silk_inner_prod16(inVec1, inVec2, len, arch) \
      ((*SILK_INNER_PROD16_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len))
 
--- a/silk/x86/main_sse.h
+++ b/silk/x86/main_sse.h
@@ -34,8 +34,6 @@
 
 # if defined(OPUS_X86_MAY_HAVE_SSE4_1)
 
-#  define OVERRIDE_silk_VQ_WMat_EC
-
 void silk_VQ_WMat_EC_sse4_1(
     opus_int8                   *ind,                           /* O    index of best codebook vector               */
     opus_int32                  *res_nrg_Q15,                   /* O    best residual energy                        */
@@ -53,12 +51,13 @@
 
 #  if defined OPUS_X86_PRESUME_SSE4_1
 
+#   define OVERRIDE_silk_VQ_WMat_EC
 #   define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
                            subfr_len, max_gain_Q7, L, arch) \
     ((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
                           subfr_len, max_gain_Q7, L))
 
-#  else
+#  elif defined(OPUS_HAVE_RTCD)
 
 extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
     opus_int8                   *ind,                           /* O    index of best codebook vector               */
@@ -75,6 +74,7 @@
     const opus_int              L                               /* I    number of vectors in codebook               */
 );
 
+#   define OVERRIDE_silk_VQ_WMat_EC
 #   define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
                            subfr_len, max_gain_Q7, L, arch) \
     ((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
@@ -82,8 +82,6 @@
 
 #  endif
 
-#  define OVERRIDE_silk_NSQ
-
 void silk_NSQ_sse4_1(
     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
@@ -104,12 +102,13 @@
 
 #  if defined OPUS_X86_PRESUME_SSE4_1
 
+#   define OVERRIDE_silk_NSQ
 #   define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
                     HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
     ((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
                    HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
 
-#  else
+#  elif defined(OPUS_HAVE_RTCD)
 
 extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
@@ -129,6 +128,7 @@
     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
 );
 
+#   define OVERRIDE_silk_NSQ
 #   define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
                     HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
     ((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
@@ -136,8 +136,6 @@
 
 #  endif
 
-#  define OVERRIDE_silk_NSQ_del_dec
-
 void silk_NSQ_del_dec_sse4_1(
     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
     silk_nsq_state              *NSQ,                                         /* I/O  NSQ state                       */
@@ -158,12 +156,13 @@
 
 #  if defined OPUS_X86_PRESUME_SSE4_1
 
+#   define OVERRIDE_silk_NSQ_del_dec
 #   define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
                             HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
     ((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
                            HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
 
-#  else
+#  elif defined(OPUS_HAVE_RTCD)
 
 extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
     const silk_encoder_state    *psEncC,                                      /* I    Encoder State                   */
@@ -183,6 +182,7 @@
     const opus_int              LTP_scale_Q14                                 /* I    LTP state scaling               */
 );
 
+#   define OVERRIDE_silk_NSQ_del_dec
 #   define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
                             HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
     ((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
@@ -221,8 +221,6 @@
     silk_VAD_state              *psSilk_VAD         /* I/O  Pointer to Silk VAD state                   */
 );
 
-#  define OVERRIDE_silk_VAD_GetSA_Q8
-
 opus_int silk_VAD_GetSA_Q8_sse4_1(
     silk_encoder_state *psEnC,
     const opus_int16   pIn[]
@@ -229,16 +227,19 @@
 );
 
 #  if defined(OPUS_X86_PRESUME_SSE4_1)
+
+#   define OVERRIDE_silk_VAD_GetSA_Q8
 #   define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))
 
-#  else
+#  elif defined(OPUS_HAVE_RTCD)
 
-#   define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
-      ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
-
 extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
      silk_encoder_state *psEnC,
      const opus_int16   pIn[]);
+
+#   define OVERRIDE_silk_VAD_GetSA_Q8
+#   define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
+      ((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
 
 #  endif
 
--- a/silk/x86/x86_silk_map.c
+++ b/silk/x86/x86_silk_map.c
@@ -35,7 +35,7 @@
 #include "pitch.h"
 #include "main.h"
 
-#if !defined(OPUS_X86_PRESUME_SSE4_1)
+#if defined(OPUS_HAVE_RTCD) && !defined(OPUS_X86_PRESUME_SSE4_1)
 
 #if defined(FIXED_POINT)