shithub: pt2-clone

--- a/src/pt2_audio.c

+++ b/src/pt2_audio.c

@@ -15,11 +15,11 @@

 #else

 #include <unistd.h>

 #endif

-#include <math.h> // sqrt(),tan()

 #include <fcntl.h>

 #include <sys/types.h>

 #include <sys/stat.h>

 #include <limits.h>

+#include "pt2_math.h"

 #include "pt2_audio.h"

 #include "pt2_header.h"

 #include "pt2_helpers.h"

@@ -37,22 +37,29 @@

 #include "pt2_ledfilter.h"

 #include "pt2_downsamplers2x.h"

+#define STEREO_NORM_FACTOR 0.5 /* cumulative mid/side normalization factor (1/sqrt(2))*(1/sqrt(2)) */

 #define INITIAL_DITHER_SEED 0x12345000

 static volatile bool ledFilterEnabled;

 static volatile uint8_t filterModel;

-static int8_t defStereoSep;

 static bool amigaPanFlag;

-static int32_t randSeed = INITIAL_DITHER_SEED;

+static int32_t randSeed = INITIAL_DITHER_SEED, stereoSeparation = 100;

 static uint32_t audLatencyPerfValInt, audLatencyPerfValFrac;

 static uint64_t tickTime64, tickTime64Frac;

 static double *dMixBufferL, *dMixBufferR, *dMixBufferLUnaligned, *dMixBufferRUnaligned;

-static double dPrngStateL, dPrngStateR, dLState[2], dRState[2];

+static double dPrngStateL, dPrngStateR, dSideFactor;

 static blep_t blep[AMIGA_VOICES], blepVol[AMIGA_VOICES];

-static rcFilter_t filterLoA500, filterHiA500, filterHiA1200;

+static rcFilter_t filterLoA500, filterHiA500, filterLoA1200, filterHiA1200;

 static ledFilter_t filterLED;

 static SDL_AudioDeviceID dev;

+static void processFiltersA1200_NoLED(int32_t numSamples);

+static void processFiltersA1200_LED(int32_t numSamples);

+static void processFiltersA500_NoLED(int32_t numSamples);

+static void processFiltersA500_LED(int32_t numSamples);

+static void (*processFiltersFunc)(int32_t);

 // for audio/video syncing

 static uint32_t tickTimeLen, tickTimeLenFrac;

@@ -62,6 +69,24 @@

 bool intMusic(void); // defined in pt_modplayer.c

+static void updateFilterFunc(void)

+{

+	if (filterModel == FILTERMODEL_A500)

+	{

+		if (ledFilterEnabled)

+			processFiltersFunc = processFiltersA500_LED;

+		else

+			processFiltersFunc = processFiltersA500_NoLED;

+	}

+	else // A1200

+	{

+		if (ledFilterEnabled)

+			processFiltersFunc = processFiltersA1200_LED;

+		else

+			processFiltersFunc = processFiltersA1200_NoLED;

+	}

+}

 void setLEDFilter(bool state, bool doLockAudio)

 	const bool audioWasntLocked = !audio.locked;

@@ -72,6 +97,7 @@

 	editor.useLEDFilter = state;

 	ledFilterEnabled = editor.useLEDFilter;

+	updateFilterFunc();

 	if (doLockAudio && audioWasntLocked)

 		unlockAudio();

@@ -87,6 +113,7 @@

 	editor.useLEDFilter ^= 1;

 	ledFilterEnabled = editor.useLEDFilter;

+	updateFilterFunc();

 	if (audioWasntLocked)

 		unlockAudio();

@@ -160,10 +187,6 @@

 	if (audioWasntLocked)

 		lockAudio();

-	// copy old pans

-	const double dOldPanL = paula[ch].dPanL;

-	const double dOldPanR = paula[ch].dPanR;

 	memset(&paula[ch], 0, sizeof (paulaVoice_t));

 	memset(&blep[ch], 0, sizeof (blep_t));

 	memset(&blepVol[ch], 0, sizeof (blep_t));

@@ -171,10 +194,6 @@

 	stopScope(ch); // it should be safe to clear the scope now

 	memset(&scope[ch], 0, sizeof (scope_t));

-	// restore old pans

-	paula[ch].dPanL = dOldPanL;

-	paula[ch].dPanR = dOldPanR;

 	if (audioWasntLocked)

 		unlockAudio();

@@ -189,6 +208,7 @@

 		mixerKillVoice(i);

 	clearRCFilterState(&filterLoA500);

+	clearRCFilterState(&filterLoA1200);

 	clearRCFilterState(&filterHiA500);

 	clearRCFilterState(&filterHiA1200);

 	clearLEDFilterState(&filterLED);

@@ -242,15 +262,13 @@

 		// this period is not cached, calculate mixer deltas

-		// during PAT2SMP or doing MOD2WAV, use different audio output rates

+		// during PAT2SMP, use different audio output rates

 		if (editor.isSMPRendering)

 			dPeriodToDeltaDiv = editor.pat2SmpHQ ? (PAULA_PAL_CLK / PAT2SMP_HI_FREQ) : (PAULA_PAL_CLK / PAT2SMP_LO_FREQ);

-		else if (editor.isWAVRendering)

-			dPeriodToDeltaDiv = PAULA_PAL_CLK / (double)MOD2WAV_FREQ;

 		else

 			dPeriodToDeltaDiv = audio.dPeriodToDeltaDiv;

-		v->dOldVoiceDelta = dPeriodToDeltaDiv / realPeriod;

+		v->dOldVoiceDelta = (dPeriodToDeltaDiv / realPeriod) * 0.5; // /2 since we do 2x oversampling

 		// for BLEP synthesis (prevents division in inner mix loop)

 		v->dOldVoiceDeltaMul = 1.0 / v->dOldVoiceDelta;

@@ -386,11 +404,14 @@

 		lockAudio();

 	clearRCFilterState(&filterLoA500);

+	clearRCFilterState(&filterLoA1200);

 	clearRCFilterState(&filterHiA500);

 	clearRCFilterState(&filterHiA1200);

 	clearLEDFilterState(&filterLED);

 	filterModel ^= 1;

+	updateFilterFunc();

 	if (filterModel == FILTERMODEL_A500)

 		displayMsg("AUDIO: AMIGA 500");

 	else

@@ -402,6 +423,7 @@

 void mixChannels(int32_t numSamples)

+	double *dMixBufSelect[AMIGA_VOICES] = { dMixBufferL, dMixBufferR, dMixBufferR, dMixBufferL };

 	double dSmp, dVol;

 	blep_t *bSmp, *bVol;

 	paulaVoice_t *v;

@@ -418,6 +440,7 @@

 		if (!v->active || v->data == NULL)

 			continue;

+		double *dMixBuf = dMixBufSelect[i];

 		for (int32_t j = 0; j < numSamples; j++)

 			assert(v->data != NULL);

@@ -444,11 +467,8 @@

 			if (bSmp->samplesLeft > 0) dSmp = blepRun(bSmp, dSmp);

 			if (bVol->samplesLeft > 0) dVol = blepRun(bVol, dVol);

-			dSmp *= dVol;

+			dMixBuf[j] += dSmp * dVol;

-			dMixBufferL[j] += dSmp * v->dPanL;

-			dMixBufferR[j] += dSmp * v->dPanR;

 			v->dPhase += v->dDelta;

 			if (v->dPhase >= 1.0) // deltas can't be >= 1.0, so this is safe

@@ -478,12 +498,6 @@

 	dPrngStateR = 0.0;

-void resetAudioDownsamplingStates(void)

-{

-	dLState[0] = dLState[1] = 0.0;

-	dRState[0] = dRState[1] = 0.0;

-}

 static inline int32_t random32(void)

 	// LCG random 32-bit generator (quite good and fast)

@@ -492,94 +506,195 @@

 	return randSeed;

-static void processMixedSamples(int32_t i, int16_t *out)

+static void processFiltersA1200_NoLED(int32_t numSamples)

-	int32_t smp32;

-	double dPrng, dOut[2], dMixL[2], dMixR[2];

+	// apply filters

+	for (int32_t i = 0; i < numSamples; i++)

+	{

+		double dOut[2];

-	// we run the filters at 2x the audio output rate for more precision

-	for (int32_t j = 0; j < 2; j++)

+		dOut[0] = dMixBufferL[i];

+		dOut[1] = dMixBufferR[i];

+		// low-pass filter

+		RCLowPassFilterStereo(&filterLoA1200, dOut, dOut);

+		// high-pass RC filter

+		RCHighPassFilterStereo(&filterHiA1200, dOut, dOut);

+		dMixBufferL[i] = dOut[0];

+		dMixBufferR[i] = dOut[1];

+	}

+}

+static void processFiltersA1200_LED(int32_t numSamples)

+{

+	// apply filters

+	for (int32_t i = 0; i < numSamples; i++)

-		// zero-padding (yes, this makes sense)

-		dOut[0] = (j == 0) ? dMixBufferL[i] : 0.0;

-		dOut[1] = (j == 0) ? dMixBufferR[i] : 0.0;

+		double dOut[2];

-		if (filterModel == FILTERMODEL_A500)

-		{

-			// A500 low-pass RC filter

-			RCLowPassFilterStereo(&filterLoA500, dOut, dOut);

+		dOut[0] = dMixBufferL[i];

+		dOut[1] = dMixBufferR[i];

-			// "LED" Sallen-Key filter

-			if (ledFilterEnabled)

-				LEDFilter(&filterLED, dOut, dOut);

+		// low-pass filter

+		RCLowPassFilterStereo(&filterLoA1200, dOut, dOut);

-			// A500 high-pass RC filter

-			RCHighPassFilterStereo(&filterHiA500, dOut, dOut);

-		}

-		else

-		{

-			// A1200 low-pass filter is ignored (we don't want it)

+		// "LED" Sallen-Key filter

+		LEDFilter(&filterLED, dOut, dOut);

-			// "LED" Sallen-Key filter

-			if (ledFilterEnabled)

-				LEDFilter(&filterLED, dOut, dOut);

+		// high-pass RC filter

+		RCHighPassFilterStereo(&filterHiA1200, dOut, dOut);

-			// A1200 high-pass RC filter

-			RCHighPassFilterStereo(&filterHiA1200, dOut, dOut);

-		}

+		dMixBufferL[i] = dOut[0];

+		dMixBufferR[i] = dOut[1];

+	}

+}

-		dMixL[j] = dOut[0];

-		dMixR[j] = dOut[1];

+static void processFiltersA500_NoLED(int32_t numSamples)

+{

+	for (int32_t i = 0; i < numSamples; i++)

+	{

+		double dOut[2];

+		dOut[0] = dMixBufferL[i];

+		dOut[1] = dMixBufferR[i];

+		// low-pass RC filter

+		RCLowPassFilterStereo(&filterLoA500, dOut, dOut);

+		// high-pass RC filter

+		RCHighPassFilterStereo(&filterHiA500, dOut, dOut);

+		dMixBufferL[i] = dOut[0];

+		dMixBufferR[i] = dOut[1];

+}

-#define NORMALIZE_DOWNSAMPLE 2.0

+static void processFiltersA500_LED(int32_t numSamples)

+{

+	for (int32_t i = 0; i < numSamples; i++)

+	{

+		double dOut[2];

-	// 2x "all-pass halfband" downsampling

-	dOut[0] = d2x(dMixL, dLState);

-	dOut[1] = d2x(dMixR, dRState);

+		dOut[0] = dMixBufferL[i];

+		dOut[1] = dMixBufferR[i];

-	// normalize and invert phase (A500/A1200 has a phase-inverted audio signal)

-	dOut[0] *= NORMALIZE_DOWNSAMPLE * (-INT16_MAX / (double)AMIGA_VOICES);

-	dOut[1] *= NORMALIZE_DOWNSAMPLE * (-INT16_MAX / (double)AMIGA_VOICES);

+		// low-pass RC filter

+		RCLowPassFilterStereo(&filterLoA500, dOut, dOut);

+		// "LED" Sallen-Key filter

+		LEDFilter(&filterLED, dOut, dOut);

+		// high-pass RC filter

+		RCHighPassFilterStereo(&filterHiA500, dOut, dOut);

+		dMixBufferL[i] = dOut[0];

+		dMixBufferR[i] = dOut[1];

+	}

+}

+#define NORM_FACTOR 2.0 /* nominally correct, but can clip from high-pass filter overshoot */

+static inline void processMixedSamplesAmigaPanning(int32_t i, int16_t *out)

+{

+	int32_t smp32;

+	double dPrng, dL, dR;

+	// 2x downsampling (decimation)

+	const uint32_t offset1 = (i << 1) + 0;

+	const uint32_t offset2 = (i << 1) + 1;

+	dL = decimate2x_L(dMixBufferL[offset1], dMixBufferL[offset2]);

+	dR = decimate2x_R(dMixBufferR[offset1], dMixBufferR[offset2]);

+	// normalize w/ phase-inversion (A500/A1200 has a phase-inverted audio signal)

+	dL *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);

+	dR *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);

 	// left channel - 1-bit triangular dithering (high-pass filtered)

 	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5

-	dOut[0] = (dOut[0] + dPrng) - dPrngStateL;

+	dL = (dL + dPrng) - dPrngStateL;

 	dPrngStateL = dPrng;

-	smp32 = (int32_t)dOut[0];

+	smp32 = (int32_t)dL;

 	CLAMP16(smp32);

 	out[0] = (int16_t)smp32;

 	// right channel - 1-bit triangular dithering (high-pass filtered)

 	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5

-	dOut[1] = (dOut[1] + dPrng) - dPrngStateR;

+	dR = (dR + dPrng) - dPrngStateR;

 	dPrngStateR = dPrng;

-	smp32 = (int32_t)dOut[1];

+	smp32 = (int32_t)dR;

 	CLAMP16(smp32);

 	out[1] = (int16_t)smp32;

-void outputAudio(int16_t *target, int32_t numSamples)

+static inline void processMixedSamples(int32_t i, int16_t *out)

-	int16_t out[2];

-	int32_t i;

+	int32_t smp32;

+	double dPrng, dL, dR;

+	// 2x downsampling (decimation)

+	const uint32_t offset1 = (i << 1) + 0;

+	const uint32_t offset2 = (i << 1) + 1;

+	dL = decimate2x_L(dMixBufferL[offset1], dMixBufferL[offset2]);

+	dR = decimate2x_R(dMixBufferR[offset1], dMixBufferR[offset2]);

+	// apply stereo separation

+	const double dOldL = dL;

+	const double dOldR = dR;

+	double dMid  = (dOldL + dOldR) * STEREO_NORM_FACTOR;

+	double dSide = (dOldL - dOldR) * dSideFactor;

+	dL = dMid + dSide;

+	dR = dMid - dSide;

+	// normalize w/ phase-inversion (A500/A1200 has a phase-inverted audio signal)

+	dL *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);

+	dR *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);

+	// left channel - 1-bit triangular dithering (high-pass filtered)

+	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5

+	dL = (dL + dPrng) - dPrngStateL;

+	dPrngStateL = dPrng;

+	smp32 = (int32_t)dL;

+	CLAMP16(smp32);

+	out[0] = (int16_t)smp32;

+	// right channel - 1-bit triangular dithering (high-pass filtered)

+	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5

+	dR = (dR + dPrng) - dPrngStateR;

+	dPrngStateR = dPrng;

+	smp32 = (int32_t)dR;

+	CLAMP16(smp32);

+	out[1] = (int16_t)smp32;

+}

+void outputAudio(int16_t *target, int32_t numSamples)

+{

 	if (editor.isSMPRendering)

 		// render to sample (PAT2SMP)

 		int32_t samplesTodo = numSamples;

-		if (editor.pat2SmpPos+samplesTodo > MAX_SAMPLE_LEN*2)

-			samplesTodo = (MAX_SAMPLE_LEN*2)-editor.pat2SmpPos;

+		if (editor.pat2SmpPos+samplesTodo > MAX_SAMPLE_LEN)

+			samplesTodo = MAX_SAMPLE_LEN-editor.pat2SmpPos;

-		mixChannels(samplesTodo);

+		// mix channels (at 2x rate, we do 2x oversampling)

+		mixChannels(samplesTodo*2);

 		double *dOutStream = &editor.dPat2SmpBuf[editor.pat2SmpPos];

-		for (i = 0; i < samplesTodo; i++)

-			dOutStream[i] = dMixBufferL[i] + dMixBufferR[i]; // normalized to -128..127 later

+		for (int32_t i = 0; i < samplesTodo; i++)

+		{

+			// 2x downsampling (decimation)

+			double dL, dR;

+			const uint32_t offset1 = (i << 1) + 0;

+			const uint32_t offset2 = (i << 1) + 1;

+			dL = decimate2x_L(dMixBufferL[offset1], dMixBufferL[offset2]);

+			dR = decimate2x_R(dMixBufferR[offset1], dMixBufferR[offset2]);

+			dOutStream[i] = (dL + dR) * 0.5; // normalized to -128..127 later

+		}

 		editor.pat2SmpPos += samplesTodo;

-		if (editor.pat2SmpPos >= MAX_SAMPLE_LEN*2)

+		if (editor.pat2SmpPos >= MAX_SAMPLE_LEN)

 			editor.smpRenderingDone = true;

 			updateWindowTitle(MOD_IS_MODIFIED);

@@ -587,18 +702,33 @@

 	else

-		// render to stream

+		// mix and filter channels (at 2x rate, we do 2x oversampling)

+		mixChannels(numSamples*2);

+		processFiltersFunc(numSamples*2);

-		mixChannels(numSamples);

+		// downsample, normalize and dither

+		int16_t out[2];

 		int16_t *outStream = target;

-		for (i = 0; i < numSamples; i++)

+		if (stereoSeparation == 100)

-			processMixedSamples(i, out);

+			for (int32_t i = 0; i < numSamples; i++)

+			{

+				processMixedSamplesAmigaPanning(i, out); // also does 2x downsampling

-			*outStream++ = out[0];

-			*outStream++ = out[1];

+				*outStream++ = out[0];

+				*outStream++ = out[1];

+			}

+		else

+		{

+			for (int32_t i = 0; i < numSamples; i++)

+			{

+				processMixedSamples(i, out); // also does 2x downsampling

+				*outStream++ = out[0];

+				*outStream++ = out[1];

+			}

+		}

@@ -648,7 +778,7 @@

 static void SDLCALL audioCallback(void *userdata, Uint8 *stream, int len)

-	if (audio.forceMixerOff) // during MOD2WAV

+	if (audio.forceSoundCardSilence) // during MOD2WAV

 		memset(stream, 0, len);

 		return;

@@ -732,127 +862,61 @@

 	** that didn't change before production (or changes that never reached production).

 	** This has been confirmed by measuring the components on several Amiga motherboards.

**

-	** Correct values for A500 (A500_R6.pdf):

-	** - RC 6dB/oct low-pass: R=360 ohm, C=0.1uF (f=4420.970Hz)

-	** - Sallen-key low-pass ("LED"): R1/R2=10k ohm, C1=6800pF, C2=3900pF (f=3090.532Hz)

-	** - RC 6dB/oct high-pass: R=1390 ohm (1000+390), C=22.33uF (22+0.33) (f=5.127Hz)

+	** Correct values for A500, >rev3 (?) (A500_R6.pdf):

+	** - 1-pole RC 6dB/oct low-pass: R=360 ohm, C=0.1uF

+	** - Sallen-key low-pass ("LED"): R1/R2=10k ohm, C1=6800pF, C2=3900pF

+	** - 1-pole RC 6dB/oct high-pass: R=1390 ohm (1000+390), C=22.33uF (22+0.33)

**

-	** Correct values for A1200 (A1200_R2.pdf):

-	** - RC 6dB/oct low-pass: R=680 ohm, C=6800pF (f=34419.321Hz)

-	** - Sallen-key low-pass ("LED"): Same as A500 (f=3090.532Hz)

-	** - RC 6dB/oct high-pass: R=1390 ohm (1000+390), C=22uF (f=5.204Hz)

+	** Correct values for A1200, all revs (A1200_R2.pdf):

+	** - 1-pole RC 6dB/oct low-pass: R=680 ohm, C=6800pF

+	** - Sallen-key low-pass ("LED"): R1/R2=10k ohm, C1=6800pF, C2=3900pF (same as A500)

+	** - 1-pole RC 6dB/oct high-pass: R=1390 ohm (1000+390), C=22uF

*/

-	// we run the filters at twice the frequency for improved precision (zero-padding)

-	const uint32_t audioFreq = audio.outputRate * 2;

+	const double dAudioFreq = audio.outputRate * 2.0; // *2 because we do 2x oversampling

 	double R, C, R1, R2, C1, C2, fc, fb;

-	const double pi = 4.0 * atan(1.0); // M_PI can not be trusted

-	/*

-	** 8bitbubsy:

-	** Hackish low-pass cutoff compensation to better match Amiga 500 when

-	** we use "lower" audio output rates. This has been loosely hand-picked

-	** after looking at many frequency analyses on a sine-sweep test module

-	** rendered on 7 different Amiga 500 machines (and taking the average).

-	** Don't try to make sense of this magic constant, and it should only be

-	** used within this very specific application!

-	**

-	** The reason we want this bias is because our digital RC filter is not

-	** that precise at lower audio output rates. It would otherwise lead to a

-	** slight unwanted cut of treble near the cutoff we aim for. It was easily

-	** audible, and especially visible on a plotted frequency spectrum.

-	**

-	** 1100Hz is the magic value I found that seems to be good. Higher than that

-	** would allow too much treble to pass.

-	**

-	** Scaling it like this is 'acceptable' (confirmed with further frequency analyses

-	** at output rates of 48, 96 and 192).

-	*/

-	double dLPCutoffBias = 1100.0 * (44100.0 / audio.outputRate);

 	// A500 1-pole (6db/oct) static RC low-pass filter:

-	R = 360.0; // R321 (360 ohm resistor)

-	C = 1e-7;  // C321 (0.1uF capacitor)

-	fc = (1.0 / (2.0 * pi * R * C)) + dLPCutoffBias;

-	calcRCFilterCoeffs(audioFreq, fc, &filterLoA500);

-	/*

-	** 8bitbubsy:

-	** We don't handle Amiga 1200's ~34kHz low-pass filter as it's not really

-	** needed. The reason it was still present in the A1200 (despite its high

-	** non-audible cutoff) was to filter away high-frequency noise from Paula's

-	** PWM (volume modulation). We don't do PWM for volume in the PT2 clone.

-	*/

+	R = 360.0; // R321 (360 ohm)

+	C = 1e-7;  // C321 (0.1uF)

+	fc = (1.0 / (PT2_TWO_PI * R * C)); // cutoff = ~4420.97Hz

+	calcRCFilterCoeffs(dAudioFreq, fc, &filterLoA500);

-	// Sallen-Key filter ("LED" filter, same RC values on A500 and A1200):

-	R1 = 10000.0; // R322 (10K ohm resistor)

-	R2 = 10000.0; // R323 (10K ohm resistor)

-	C1 = 6.8e-9;  // C322 (6800pF capacitor)

-	C2 = 3.9e-9;  // C323 (3900pF capacitor)

-	fc = 1.0 / (2.0 * pi * sqrt(R1 * R2 * C1 * C2));

-	fb = 0.125; // Fb = 0.125 : Q ~= 1/sqrt(2)

-	calcLEDFilterCoeffs(audioFreq, fc, fb, &filterLED);

+	// A1200 1-pole (6db/oct) static RC low-pass filter:

+	R = 680.0;  // R321 (680 ohm)

+	C = 6.8e-9; // C321 (6800pF)

+	fc = (1.0 / (PT2_TWO_PI * R * C)); // cutoff = ~34419.32Hz

+	calcRCFilterCoeffs(dAudioFreq, fc, &filterLoA1200);

+	// Sallen-Key filter ("LED" filter, same values on A500/A1200):

+	R1 = 10000.0; // R322 (10K ohm)

+	R2 = 10000.0; // R323 (10K ohm)

+	C1 = 6.8e-9;  // C322 (6800pF)

+	C2 = 3.9e-9;  // C323 (3900pF)

+	fc = 1.0 / (PT2_TWO_PI * pt2_sqrt(R1 * R2 * C1 * C2)); // cutoff = ~3090.53Hz

+	fb = 0.125/2.0; // Fb = 0.125 : Q ~= 1/sqrt(2) (Butterworth) (8bb: was 0.125, but /2 gives a closer gain!)

+	calcLEDFilterCoeffs(dAudioFreq, fc, fb, &filterLED);

 	// A500 1-pole (6dB/oct) static RC high-pass filter:

-	R = 1390.0; // R324 (1K ohm resistor) + R325 (390 ohm resistor)

-	C = 2.233e-5; // C334 (22uF capacitor) + C335 (0.33�F capacitor)

-	fc = 1.0 / (2.0 * pi * R * C);

-	calcRCFilterCoeffs(audioFreq, fc, &filterHiA500);

+	R = 1390.0;   // R324 (1K ohm) + R325 (390 ohm)

+	C = 2.233e-5; // C334 (22uF) + C335 (0.33uF)

+	fc = 1.0 / (PT2_TWO_PI * R * C); // cutoff = ~5.13Hz

+	calcRCFilterCoeffs(dAudioFreq, fc, &filterHiA500);

 	// A1200 1-pole (6dB/oct) static RC high-pass filter:

 	R = 1390.0; // R324 (1K ohm resistor) + R325 (390 ohm resistor)

 	C = 2.2e-5; // C334 (22uF capacitor)

-	fc = 1.0 / (2.0 * pi * R * C);

-	calcRCFilterCoeffs(audioFreq, fc, &filterHiA1200);

+	fc = 1.0 / (PT2_TWO_PI * R * C); // cutoff = ~5.20Hz

+	calcRCFilterCoeffs(dAudioFreq, fc, &filterHiA1200);

-void recalcFilterCoeffs(int32_t outputRate) // for MOD2WAV

+void mixerSetStereoSeparation(uint8_t percentage) // 0..100 (percentage)

-	const bool audioWasntLocked = !audio.locked;

-	if (audioWasntLocked)

-		lockAudio();

+	assert(percentage <= 100);

-	const int32_t oldOutputRate = audio.outputRate;

-	audio.outputRate = outputRate;

-	clearRCFilterState(&filterLoA500);

-	clearRCFilterState(&filterHiA500);

-	clearRCFilterState(&filterHiA1200);

-	clearLEDFilterState(&filterLED);

-	calculateFilterCoeffs();

-	audio.outputRate = oldOutputRate;

-	if (audioWasntLocked)

-		unlockAudio();

+	stereoSeparation = percentage;

+	dSideFactor = (percentage / 100.0) * STEREO_NORM_FACTOR;

-static void setVoicePan(int32_t ch, double pan) // pan = 0.0 .. 1.0

-{

-	// constant power panning

-	const double pi = 4.0 * atan(1.0); // M_PI can not be trusted

-	paula[ch].dPanL = cos(pan * pi * 0.5) * sqrt(2.0);

-	paula[ch].dPanR = sin(pan * pi * 0.5) * sqrt(2.0);

-}

-void mixerCalcVoicePans(uint8_t stereoSeparation) // 0..100 (percentage)

-{

-	assert(stereoSeparation <= 100);

-	const double panMid = 0.5;

-	const double panR = panMid + (stereoSeparation / (100.0 * 2.0));

-	const double panL = 1.0 - panR;

-	setVoicePan(0, panL);

-	setVoicePan(1, panR);

-	setVoicePan(2, panR);

-	setVoicePan(3, panL);

-}

 static double ciaBpm2Hz(int32_t bpm)

 	if (bpm == 0)

@@ -873,17 +937,15 @@

 		else

 			dBpmHz = ciaBpm2Hz(bpm);

-		const double dSamplesPerTick = audio.outputRate / dBpmHz;

-		const double dSamplesPerTick28kHz = PAT2SMP_HI_FREQ / dBpmHz; // PAT2SMP hi quality

-		const double dSamplesPerTick22kHz = PAT2SMP_LO_FREQ / dBpmHz; // PAT2SMP low quality

-		const double dSamplesPerTickMod2Wav = MOD2WAV_FREQ / dBpmHz; // MOD2WAV

+		const double dSamplesPerTick      = audio.outputRate / dBpmHz;

+		const double dSamplesPerTick28kHz = PAT2SMP_HI_FREQ  / dBpmHz; // PAT2SMP hi quality

+		const double dSamplesPerTick20kHz = PAT2SMP_LO_FREQ  / dBpmHz; // PAT2SMP low quality

 		// convert to rounded 32.32 fixed-point

-		const int32_t i = bpm-32;

-		audio.bpmTable[i] = (int64_t)((dSamplesPerTick * (UINT32_MAX+1.0)) + 0.5);

+		const int32_t i = bpm - 32;

+		audio.bpmTable[i]      = (int64_t)((dSamplesPerTick      * (UINT32_MAX+1.0)) + 0.5);

 		audio.bpmTable28kHz[i] = (int64_t)((dSamplesPerTick28kHz * (UINT32_MAX+1.0)) + 0.5);

-		audio.bpmTable22kHz[i] = (int64_t)((dSamplesPerTick22kHz * (UINT32_MAX+1.0)) + 0.5);

-		audio.bpmTableMod2Wav[i] = (int64_t)((dSamplesPerTickMod2Wav * (UINT32_MAX+1.0)) + 0.5);

+		audio.bpmTable20kHz[i] = (int64_t)((dSamplesPerTick20kHz * (UINT32_MAX+1.0)) + 0.5);

@@ -941,9 +1003,11 @@

 		return false;

-	if (have.freq < 32000) // lower than this is not safe for the BLEP synthesis in the mixer

+	// lower than this is not safe for the BLEP synthesis in the mixer

+	const int32_t minFreq = (int32_t)(PAULA_PAL_CLK / 113.0 / 2.0)+1; // /2 because we do 2x oversampling

+	if (have.freq < minFreq)

-		showErrorMsgBox("Unable to open audio: An audio rate below 32kHz can't be used!");

+		showErrorMsgBox("Unable to open audio: An audio rate below %dHz can't be used!", minFreq);

 		return false;

@@ -960,15 +1024,13 @@

 	updateReplayerTimingMode();

 	const int32_t lowestBPM = 32;

-	const int32_t pat2SmpMaxSamples = (audio.bpmTable22kHz[lowestBPM-32] + (1LL + 31)) >> 32; // ceil (rounded upwards)

-	const int32_t mod2WavMaxSamples = (audio.bpmTableMod2Wav[lowestBPM-32] + (1LL + 31)) >> 32; // ceil (rounded upwards)

+	const int32_t pat2SmpMaxSamples = (audio.bpmTable20kHz[lowestBPM-32] + (1LL + 31)) >> 32; // ceil (rounded upwards)

 	const int32_t renderMaxSamples = (audio.bpmTable[lowestBPM-32] + (1LL + 31)) >> 32; // ceil (rounded upwards)

+	const int32_t maxSamplesToMix = MAX(pat2SmpMaxSamples, renderMaxSamples) * 2; // *2 for headroom (XXX: buggy code somewhere?)

-	const int32_t maxSamplesToMix = MAX(pat2SmpMaxSamples, MAX(mod2WavMaxSamples, renderMaxSamples));

+	dMixBufferLUnaligned = (double *)MALLOC_PAD(maxSamplesToMix * sizeof (double), 256);

+	dMixBufferRUnaligned = (double *)MALLOC_PAD(maxSamplesToMix * sizeof (double), 256);

-	dMixBufferLUnaligned = (double *)MALLOC_PAD(maxSamplesToMix * sizeof (double) * 8, 256);

-	dMixBufferRUnaligned = (double *)MALLOC_PAD(maxSamplesToMix * sizeof (double) * 8, 256);

 	if (dMixBufferLUnaligned == NULL || dMixBufferRUnaligned == NULL)

 		showErrorMsgBox("Out of memory!");

@@ -978,8 +1040,7 @@

 	dMixBufferL = (double *)ALIGN_PTR(dMixBufferLUnaligned, 256);

 	dMixBufferR = (double *)ALIGN_PTR(dMixBufferRUnaligned, 256);

-	mixerCalcVoicePans(config.stereoSeparation);

-	defStereoSep = config.stereoSeparation;

+	mixerSetStereoSeparation(config.stereoSeparation);

 	filterModel = config.filterModel;

 	ledFilterEnabled = false;

@@ -991,8 +1052,10 @@

 	calcAudioLatencyVars(audio.audioBufferSize, audio.outputRate);

 	resetCachedMixerPeriod();

-	resetAudioDownsamplingStates();

+	clearMixerDownsamplerStates();

 	audio.resetSyncTickTimeFlag = true;

+	updateFilterFunc();

 	SDL_PauseAudioDevice(dev, false);

 	return true;

@@ -1028,12 +1091,12 @@

 	amigaPanFlag ^= 1;

 	if (!amigaPanFlag)

-		mixerCalcVoicePans(defStereoSep);

+		mixerSetStereoSeparation(config.stereoSeparation);

 		displayMsg("AMIGA PANNING OFF");

 	else

-		mixerCalcVoicePans(100);

+		mixerSetStereoSeparation(100);

 		displayMsg("AMIGA PANNING ON");

--- a/src/pt2_audio.h

+++ b/src/pt2_audio.h

@@ -8,11 +8,11 @@

 	volatile bool locked, isSampling;

-	bool forceMixerOff;

+	bool forceSoundCardSilence;

 	uint32_t outputRate, audioBufferSize;

 	int64_t tickSampleCounter64, samplesPerTick64;

-	int64_t bpmTable[256-32], bpmTable28kHz[256-32], bpmTable22kHz[256-32], bpmTableMod2Wav[256-32]; // 32.32 fixed-point

+	int64_t bpmTable[256-32], bpmTable28kHz[256-32], bpmTable20kHz[256-32]; // 32.32 fixed-point

 	double dPeriodToDeltaDiv;

 	// for audio sampling

@@ -29,7 +29,7 @@

 	const int8_t *data, *newData;

 	int32_t length, newLength, pos;

-	double dVolume, dDelta, dDeltaMul, dPhase, dLastDelta, dLastDeltaMul, dLastPhase, dPanL, dPanR;

+	double dVolume, dDelta, dDeltaMul, dPhase, dLastDelta, dLastDeltaMul, dLastPhase;

 	// period cache

 	int32_t oldPeriod;

@@ -58,7 +58,6 @@

 void normalizeFloatTo8Bit(float *fSampleData, uint32_t sampleLength);

 void normalizeDoubleTo8Bit(double *dSampleData, uint32_t sampleLength);

-void recalcFilterCoeffs(int32_t outputRate); // for MOD2WAV

 void setLEDFilter(bool state, bool doLockAudio);

 void toggleLEDFilter(void);

 void toggleAmigaPanMode(void);

@@ -74,9 +73,8 @@

 void mixerUpdateLoops(void);

 void mixerKillVoice(int32_t ch);

 void turnOffVoices(void);

-void mixerCalcVoicePans(uint8_t stereoSeparation);

+void mixerSetStereoSeparation(uint8_t percentage);

 void outputAudio(int16_t *target, int32_t numSamples);

-void resetAudioDownsamplingStates(void);

 extern audio_t audio; // pt2_audio.c

 extern paulaVoice_t paula[AMIGA_VOICES]; // pt2_audio.c

--- a/src/pt2_chordmaker.c

+++ b/src/pt2_chordmaker.c

@@ -164,7 +164,7 @@

 	s->text[21] = '!'; // chord sample indicator

 	s->text[22] = '\0';

-	memset(mixCh, 0, sizeof (mixCh));

+	memset(mixCh, 0, sizeof (mixCh)); // also clears position and frac

 	// setup mixing lengths and deltas

--- a/src/pt2_downsample2x.c

+++ b/src/pt2_downsample2x.c

@@ -3,82 +3,167 @@

 #include <crtdbg.h>

 #endif

+/* High-quality /2 decimator from

+** https://www.musicdsp.org/en/latest/Filters/231-hiqh-quality-2-decimators.html

+*/

 #include <stdint.h>

 #include <stdbool.h>

-#include "pt2_helpers.h" // CLAMP

+#include <math.h> // round()

+#include "pt2_helpers.h" // ABS()

-static double state[2];

+// ----------------------------------------------------------

+// reserved for main audio channel mixer, PAT2SMP and MOD2WAV

+// ----------------------------------------------------------

-/*

-** - all-pass halfband filters (2x downsample) -

-**

-** 8bitbubsy: Not sure who coded these. Possibly aciddose,

-** or maybe he found it on the internet somewhere...

-*/

+static double R1_L, R2_L, R3_L, R4_L, R5_L, R6_L, R7_L, R8_L, R9_L;

+static double R1_R, R2_R, R3_R, R4_R, R5_R, R6_R, R7_R, R8_R, R9_R;

-static double f(const double in, double *b, const double c)

+void clearMixerDownsamplerStates(void)

-	const double x = (in - *b) * c;

-	const double out = *b + x;

-	*b = in + x;

+	R1_L = R2_L = R3_L = R4_L = R5_L = R6_L = R7_L = R8_L = R9_L = 0.0;

+	R1_R = R2_R = R3_R = R4_R = R5_R = R6_R = R7_R = R8_R = R9_R = 0.0;

+}

-	return out;

+double decimate2x_L(double x0, double x1)

+{

+	const double h0 =  8192.0 / 16384.0;

+	const double h1 =  5042.0 / 16384.0;

+	const double h3 = -1277.0 / 16384.0;

+	const double h5 =   429.0 / 16384.0;

+	const double h7 =  -116.0 / 16384.0;

+	const double h9 =    18.0 / 16384.0;

+	double h9x0 = h9*x0;

+	double h7x0 = h7*x0;

+	double h5x0 = h5*x0;

+	double h3x0 = h3*x0;

+	double h1x0 = h1*x0;

+	double R10  = R9_L+h9x0;

+	R9_L = R8_L+h7x0;

+	R8_L = R7_L+h5x0;

+	R7_L = R6_L+h3x0;

+	R6_L = R5_L+h1x0;

+	R5_L = R4_L+h1x0+h0*x1;

+	R4_L = R3_L+h3x0;

+	R3_L = R2_L+h5x0;

+	R2_L = R1_L+h7x0;

+	R1_L = h9x0;

+	return R10;

-double d2x(const double *input, double *b)

+double decimate2x_R(double x0, double x1)

-	return (f(input[0], &b[0], 0.150634765625) + f(input[1], &b[1], -0.3925628662109375)) * 0.5;

+	const double h0 =  8192.0 / 16384.0;

+	const double h1 =  5042.0 / 16384.0;

+	const double h3 = -1277.0 / 16384.0;

+	const double h5 =   429.0 / 16384.0;

+	const double h7 =  -116.0 / 16384.0;

+	const double h9 =    18.0 / 16384.0;

+	double h9x0 = h9*x0;

+	double h7x0 = h7*x0;

+	double h5x0 = h5*x0;

+	double h3x0 = h3*x0;

+	double h1x0 = h1*x0;

+	double R10  = R9_R+h9x0;

+	R9_R = R8_R+h7x0;

+	R8_R = R7_R+h5x0;

+	R7_R = R6_R+h3x0;

+	R6_R = R5_R+h1x0;

+	R5_R = R4_R+h1x0+h0*x1;

+	R4_R = R3_R+h3x0;

+	R3_R = R2_R+h5x0;

+	R2_R = R1_R+h7x0;

+	R1_R = h9x0;

+	return R10;

+// ----------------------------------------------------------

+// ----------------------------------------------------------

+// ----------------------------------------------------------

+static double R1, R2, R3, R4, R5, R6, R7, R8, R9;

+static void clearDownsamplerState(void)

+{

+	R1 = R2 = R3 = R4 = R5 = R6 = R7 = R8 = R9 = 0.0;

+}

+static double decimate2x(double x0, double x1)

+{

+	const double h0 =  8192.0 / 16384.0;

+	const double h1 =  5042.0 / 16384.0;

+	const double h3 = -1277.0 / 16384.0;

+	const double h5 =   429.0 / 16384.0;

+	const double h7 =  -116.0 / 16384.0;

+	const double h9 =    18.0 / 16384.0;

+	double h9x0 = h9*x0;

+	double h7x0 = h7*x0;

+	double h5x0 = h5*x0;

+	double h3x0 = h3*x0;

+	double h1x0 = h1*x0;

+	double R10  = R9+h9x0;

+	R9 = R8+h7x0;

+	R8 = R7+h5x0;

+	R7 = R6+h3x0;

+	R6 = R5+h1x0;

+	R5 = R4+h1x0+h0*x1;

+	R4 = R3+h3x0;

+	R3 = R2+h5x0;

+	R2 = R1+h7x0;

+	R1 = h9x0;

+	return R10;

+}

 // Warning: These can exceed original range because of undershoot/overshoot!

-void downsample2xDouble(double *buffer, int32_t originalLength)

+void downsample2xDouble(double *buffer, uint32_t originalLength)

-	state[0] = state[1] = 0.0;

+	clearDownsamplerState();

 	const double *input = buffer;

-	const int32_t length = originalLength / 2;

-	for (int32_t i = 0; i < length; i++, input += 2)

-		buffer[i] = d2x(input, state);

+	const uint32_t length = originalLength / 2;

+	for (uint32_t i = 0; i < length; i++, input += 2)

+		buffer[i] = decimate2x(input[0], input[1]);

-void downsample2xFloat(float *buffer, int32_t originalLength)

+void downsample2xFloat(float *buffer, uint32_t originalLength)

-	double in[2];

+	clearDownsamplerState();

-	state[0] = state[1] = 0.0;

 	const float *input = buffer;

-	const int32_t length = originalLength / 2;

-	for (int32_t i = 0; i < length; i++, input += 2)

-	{

-		in[0] = input[0];

-		in[1] = input[1];

-		buffer[i] = (float)d2x(in, state);

-	}

+	const uint32_t length = originalLength / 2;

+	for (uint32_t i = 0; i < length; i++, input += 2)

+		buffer[i] = (float)decimate2x(input[0], input[1]);

 // Warning: These are slow and use normalization to prevent clipping from undershoot/overshoot!

-bool downsample2x8BitU(uint8_t *buffer, int32_t originalLength)

+bool downsample2x8BitU(uint8_t *buffer, uint32_t originalLength)

-	state[0] = state[1] = 0.0;

 	double *dBuffer = (double *)malloc(originalLength * sizeof (double));

 	if (dBuffer == NULL)

 		return false;

-	for (int32_t i = 0; i < originalLength; i++)

+	for (uint32_t i = 0; i < originalLength; i++)

 		dBuffer[i] = (buffer[i] - 128) * (1.0 / (INT8_MAX+1.0));

 	const double *input = dBuffer;

 	double dPeak = 0.0;

-	const int32_t length = originalLength / 2;

-	for (int32_t i = 0; i < length; i++, input += 2)

+	clearDownsamplerState();

+	const uint32_t length = originalLength / 2;

+	for (uint32_t i = 0; i < length; i++, input += 2)

-		double dOut = d2x(input, state);

+		double dOut = decimate2x(input[0], input[1]);

 		dBuffer[i] = dOut;

 		dOut = ABS(dOut);

@@ -92,32 +177,39 @@

 	if (dPeak > 0.0)

 		dAmp = INT8_MAX / dPeak;

-	for (int32_t i = 0; i < length; i++)

-		buffer[i] = (uint8_t)round(dBuffer[i] * dAmp) + 128;

+	for (uint32_t i = 0; i < length; i++)

+	{

+		double dSmp = dBuffer[i] * dAmp;

+		// faster than calling round()

+		     if (dSmp < 0.0) dSmp -= 0.5;

+		else if (dSmp > 0.0) dSmp += 0.5;

+		buffer[i] = (uint8_t)dSmp + 128;

+	}

 	free(dBuffer);

 	return true;

-bool downsample2x8Bit(int8_t *buffer, int32_t originalLength)

+bool downsample2x8Bit(int8_t *buffer, uint32_t originalLength)

-	state[0] = state[1] = 0.0;

 	double *dBuffer = (double *)malloc(originalLength * sizeof (double));

 	if (dBuffer == NULL)

 		return false;

-	for (int32_t i = 0; i < originalLength; i++)

+	for (uint32_t i = 0; i < originalLength; i++)

 		dBuffer[i] = buffer[i] * (1.0 / (INT8_MAX+1.0));

 	const double *input = dBuffer;

 	double dPeak = 0.0;

-	const int32_t length = originalLength / 2;

-	for (int32_t i = 0; i < length; i++, input += 2)

+	clearDownsamplerState();

+	const uint32_t length = originalLength / 2;

+	for (uint32_t i = 0; i < length; i++, input += 2)

-		double dOut = d2x(input, state);

+		double dOut = decimate2x(input[0], input[1]);

 		dBuffer[i] = dOut;

 		dOut = ABS(dOut);

@@ -131,32 +223,39 @@

 	if (dPeak > 0.0)

 		dAmp = INT8_MAX / dPeak;

-	for (int32_t i = 0; i < length; i++)

-		buffer[i] = (int8_t)round(dBuffer[i] * dAmp);

+	for (uint32_t i = 0; i < length; i++)

+	{

+		double dSmp = dBuffer[i] * dAmp;

+		// faster than calling round()

+		     if (dSmp < 0.0) dSmp -= 0.5;

+		else if (dSmp > 0.0) dSmp += 0.5;

+		buffer[i] = (int8_t)dSmp;

+	}

 	free(dBuffer);

 	return true;

-bool downsample2x16Bit(int16_t *buffer, int32_t originalLength)

+bool downsample2x16Bit(int16_t *buffer, uint32_t originalLength)

-	state[0] = state[1] = 0.0;

 	double *dBuffer = (double *)malloc(originalLength * sizeof (double));

 	if (dBuffer == NULL)

 		return false;

-	for (int32_t i = 0; i < originalLength; i++)

+	for (uint32_t i = 0; i < originalLength; i++)

 		dBuffer[i] = buffer[i] * (1.0 / (INT16_MAX+1.0));

 	const double *input = dBuffer;

 	double dPeak = 0.0;

-	const int32_t length = originalLength / 2;

-	for (int32_t i = 0; i < length; i++, input += 2)

+	clearDownsamplerState();

+	const uint32_t length = originalLength / 2;

+	for (uint32_t i = 0; i < length; i++, input += 2)

-		double dOut = d2x(input, state);

+		double dOut = decimate2x(input[0], input[1]);

 		dBuffer[i] = dOut;

 		dOut = ABS(dOut);

@@ -170,32 +269,39 @@

 	if (dPeak > 0.0)

 		dAmp = INT16_MAX / dPeak;

-	for (int32_t i = 0; i < length; i++)

-		buffer[i] = (int16_t)round(dBuffer[i] * dAmp);

+	for (uint32_t i = 0; i < length; i++)

+	{

+		double dSmp = dBuffer[i] * dAmp;

+		// faster than calling round()

+		     if (dSmp < 0.0) dSmp -= 0.5;

+		else if (dSmp > 0.0) dSmp += 0.5;

+		buffer[i] = (int16_t)dSmp;

+	}

 	free(dBuffer);

 	return true;

-bool downsample2x32Bit(int32_t *buffer, int32_t originalLength)

+bool downsample2x32Bit(int32_t *buffer, uint32_t originalLength)

-	state[0] = state[1] = 0.0;

 	double *dBuffer = (double *)malloc(originalLength * sizeof (double));

 	if (dBuffer == NULL)

 		return false;

-	for (int32_t i = 0; i < originalLength; i++)

+	for (uint32_t i = 0; i < originalLength; i++)

 		dBuffer[i] = buffer[i] * (1.0 / (INT32_MAX+1.0));

 	const double *input = dBuffer;

 	double dPeak = 0.0;

-	const int32_t length = originalLength / 2;

-	for (int32_t i = 0; i < length; i++, input += 2)

+	clearDownsamplerState();

+	const uint32_t length = originalLength / 2;

+	for (uint32_t i = 0; i < length; i++, input += 2)

-		double dOut = d2x(input, state);

+		double dOut = decimate2x(input[0], input[1]);

 		dBuffer[i] = dOut;

 		dOut = ABS(dOut);

@@ -209,8 +315,16 @@

 	if (dPeak > 0.0)

 		dAmp = INT32_MAX / dPeak;

-	for (int32_t i = 0; i < length; i++)

-		buffer[i] = (int32_t)round(dBuffer[i] * dAmp);

+	for (uint32_t i = 0; i < length; i++)

+	{

+		double dSmp = dBuffer[i] * dAmp;

+		// faster than calling round()

+		     if (dSmp < 0.0) dSmp -= 0.5;

+		else if (dSmp > 0.0) dSmp += 0.5;

+		buffer[i] = (int32_t)dSmp;

+	}

 	free(dBuffer);

--- a/src/pt2_downsamplers2x.h

+++ b/src/pt2_downsamplers2x.h

@@ -2,18 +2,18 @@

 #include <stdint.h>

-// all-pass halfband filters

+// reserved for main audio channel mixer, PAT2SMP and MOD2WAV

+void clearMixerDownsamplerStates(void);

+double decimate2x_L(double x0, double x1);

+double decimate2x_R(double x0, double x1);

+// --------------------------------------

-double d2x(const double *input, double *b);

 // Warning: These can exceed -1.0 .. 1.0 because of undershoot/overshoot!

+void downsample2xFloat(float *buffer, uint32_t originalLength);

+void downsample2xDouble(double *buffer, uint32_t originalLength);

-void downsample2xFloat(float *buffer, int32_t originalLength);

-void downsample2xDouble(double *buffer, int32_t originalLength);

 // Warning: These are slow and use normalization to prevent clipping from undershoot/overshoot!

-void downsample2x8Bit(int8_t *buffer, int32_t originalLength);

-void downsample2x8BitU(uint8_t *buffer, int32_t originalLength);

-void downsample2x16Bit(int16_t *buffer, int32_t originalLength);

-void downsample2x32Bit(int32_t *buffer, int32_t originalLength);

+void downsample2x8Bit(int8_t *buffer, uint32_t originalLength);

+void downsample2x8BitU(uint8_t *buffer, uint32_t originalLength);

+void downsample2x16Bit(int16_t *buffer, uint32_t originalLength);

+void downsample2x32Bit(int32_t *buffer, uint32_t originalLength);

--- a/src/pt2_header.h

+++ b/src/pt2_header.h

@@ -14,7 +14,7 @@

 #include "pt2_unicode.h"

 #include "pt2_palette.h"

-#define PROG_VER_STR "1.32"

+#define PROG_VER_STR "1.33"

 #ifdef _WIN32

 #define DIR_DELIMITER '\\'

--- a/src/pt2_ledfilter.c

+++ b/src/pt2_ledfilter.c

@@ -1,8 +1,3 @@

-#include <stdint.h>

-#include <math.h>

-#include "pt2_rcfilter.h" // DENORMAL_OFFSET definition

-#include "pt2_ledfilter.h"

 /* aciddose:

 ** Imperfect Amiga "LED" filter implementation. This may be further improved in the future.

 ** Based upon ideas posted by mystran @ the kvraudio.com forum.

@@ -10,12 +5,14 @@

 ** This filter may not function correctly used outside the fixed-cutoff context here!

*/

+#include <stdint.h>

+#include "pt2_math.h"

+#include "pt2_ledfilter.h"

 void clearLEDFilterState(ledFilter_t *filterLED)

-	filterLED->buffer[0] = 0.0; // left channel

-	filterLED->buffer[1] = 0.0;

-	filterLED->buffer[2] = 0.0; // right channel

-	filterLED->buffer[3] = 0.0;

+	filterLED->buffer[0] = filterLED->buffer[1] = 0.0; // left channel

+	filterLED->buffer[2] = filterLED->buffer[3] = 0.0; // right channel

 static double sigmoid(double x, double coefficient)

@@ -30,14 +27,8 @@

 void calcLEDFilterCoeffs(const double sr, const double hz, const double fb, ledFilter_t *filter)

-	/* aciddose:

-	** tan() may produce NaN or other bad results in some cases!

-	** It appears to work correctly with these specific coefficients.

-	*/

-	const double pi = 4.0 * atan(1.0); // M_PI can not be trusted

-	const double c = (hz < (sr / 2.0)) ? tan((pi * hz) / sr) : 1.0;

+	// 8bitbubsy: the tangent approximation is suitable for these input ranges

+	const double c = (hz < sr/2.0) ? pt2_tan((PT2_PI * hz) / sr) : 1.0;

 	const double g = 1.0 / (1.0 + c);

 	// aciddose: dirty compensation

--- a/src/pt2_main.c

+++ b/src/pt2_main.c

@@ -233,7 +233,7 @@

 	makeSureDirIsProgramDir();

 #endif

-	if (!initializeVars())

+	if (!initializeVars() || !initKaiserTable())

 		cleanUp();

 		SDL_Quit();

@@ -899,6 +899,7 @@

 	videoClose();

 	freeSprites();

 	freeAudioDeviceList(); // pt2_sampling.c

+	freeKaiserTable(); // pt2_sampling.c

 	if (config.defModulesDir != NULL) free(config.defModulesDir);

 	if (config.defSamplesDir != NULL) free(config.defSamplesDir);

--- /dev/null

+++ b/src/pt2_math.c

@@ -1,0 +1,92 @@

+/* Quite accurate approximation routines for sin/cos/sqrt/tan.

+** These should not be used in realtime, as they are too slow.

+*/

+#include <stdint.h>

+#include <stdbool.h>

+#include <math.h>

+#include "pt2_math.h"

+static const double pi = PT2_PI;

+static const double twopi = PT2_TWO_PI;

+static const double four_over_pi = 4.0 / PT2_PI;

+static const double threehalfpi = (3.0 * PT2_PI) / 2.0;

+static const double halfpi = PT2_PI / 2.0;

+static double tan_14s(double x)

+{

+	const double c1 = -34287.4662577359568109624;

+	const double c2 =   2566.7175462315050423295;

+	const double c3 = -   26.5366371951731325438;

+	const double c4 = -43656.1579281292375769579;

+	const double c5 =  12244.4839556747426927793;

+	const double c6 = -  336.611376245464339493;

+	double x2 = x * x;

+	return x*(c1 + x2*(c2 + x2*c3))/(c4 + x2*(c5 + x2*(c6 + x2)));

+}

+double pt2_tan(double x)

+{

+	x = fmod(x, twopi);

+	const int32_t octant = (int32_t)(x * four_over_pi);

+	switch (octant)

+	{

+		default:

+		case 0: return      tan_14s(x               * four_over_pi);

+		case 1: return  1.0/tan_14s((halfpi-x)      * four_over_pi);

+		case 2: return -1.0/tan_14s((x-halfpi)      * four_over_pi);

+		case 3: return     -tan_14s((pi-x)          * four_over_pi);

+		case 4: return      tan_14s((x-pi)          * four_over_pi);

+		case 5: return  1.0/tan_14s((threehalfpi-x) * four_over_pi);

+		case 6: return -1.0/tan_14s((x-threehalfpi) * four_over_pi);

+		case 7: return     -tan_14s((twopi-x)       * four_over_pi);

+	}

+}

+double pt2_sqrt(double x)

+{

+	double number = x;

+	double s = number / 2.5;

+	double old = 0.0;

+	while (s != old)

+	{

+		old = s;

+		s = (number / old + old) / 2.0;

+	}

+	return s;

+}

+static double cosTaylorSeries(double x)

+{

+#define ITERATIONS 32 /* good enough... */

+	x = fmod(x, twopi);

+	if (x < 0.0)

+		x = -x;

+	double tmp = 1.0;

+	double sum = 1.0;

+	for (double i = 2.0; i <= ITERATIONS*2.0; i += 2.0)

+	{

+		tmp *= -(x*x) / (i * (i-1.0));

+		sum += tmp;

+	}

+	return sum;

+}

+double pt2_cos(double x)

+{

+	return cosTaylorSeries(x);

+}

+double pt2_sin(double x)

+{

+	return cosTaylorSeries(halfpi-x);

+}

--- /dev/null

+++ b/src/pt2_math.h

@@ -1,0 +1,15 @@

+#pragma once

+#include <stdint.h>

+#include <stdbool.h>

+// adding this prevents denormalized numbers, which is slow

+#define DENORMAL_OFFSET 1e-20

+#define PT2_PI 3.14159265358979323846264338327950288

+#define PT2_TWO_PI 6.28318530717958647692528676655900576

+double pt2_sqrt(double x);

+double pt2_tan(double x);

+double pt2_cos(double x);

+double pt2_sin(double x);

--- a/src/pt2_mod2wav.c

+++ b/src/pt2_mod2wav.c

@@ -14,6 +14,7 @@

 #include "pt2_visuals.h"

 #include "pt2_mod2wav.h"

 #include "pt2_structs.h"

+#include "pt2_downsamplers2x.h"

 #define TICKS_PER_RENDER_CHUNK 64

@@ -36,13 +37,12 @@

 	// skip wav header place, render data first

 	fseek(f, sizeof (wavHeader_t), SEEK_SET);

-	if (MOD2WAV_FREQ != audio.outputRate)

-		recalcFilterCoeffs(MOD2WAV_FREQ);

 	uint32_t sampleCounter = 0;

 	uint8_t tickCounter = 8;

 	int64_t tickSampleCounter64 = 0;

+	clearMixerDownsamplerStates();

 	bool renderDone = false;

 	while (!renderDone)

@@ -89,9 +89,6 @@

 			fwrite(mod2WavBuffer, sizeof (int16_t), samplesInChunk, f);

-	if (MOD2WAV_FREQ != audio.outputRate)

-		recalcFilterCoeffs(audio.outputRate);

 	free(mod2WavBuffer);

 	if (sampleCounter & 1)

@@ -109,7 +106,7 @@

 	wavHeader.subchunk1Size = 16;

 	wavHeader.audioFormat = 1;

 	wavHeader.numChannels = 2;

-	wavHeader.sampleRate = MOD2WAV_FREQ;

+	wavHeader.sampleRate = audio.outputRate;

 	wavHeader.bitsPerSample = 16;

 	wavHeader.byteRate = (wavHeader.sampleRate * wavHeader.numChannels * wavHeader.bitsPerSample) / 8;

 	wavHeader.blockAlign = (wavHeader.numChannels * wavHeader.bitsPerSample) / 8;

@@ -120,7 +117,7 @@

 	fwrite(&wavHeader, sizeof (wavHeader_t), 1, f);

 	fclose(f);

-	resetAudioDownsamplingStates();

+	clearMixerDownsamplerStates();

 	ui.mod2WavFinished = true;

 	ui.updateMod2WavDialog = true;

@@ -164,7 +161,7 @@

 	const int32_t lowestBPM = 32;

-	const int64_t maxSamplesToMix64 = audio.bpmTableMod2Wav[lowestBPM-32];

+	const int64_t maxSamplesToMix64 = audio.bpmTable[lowestBPM-32];

 	const int32_t maxSamplesToMix = ((TICKS_PER_RENDER_CHUNK * maxSamplesToMix64) + (1LL << 31)) >> 32; // ceil (rounded upwards)

 	mod2WavBuffer = (int16_t *)malloc(maxSamplesToMix * (2 * sizeof (int16_t)));

--- a/src/pt2_mod2wav.h

+++ b/src/pt2_mod2wav.h

@@ -2,6 +2,4 @@

 #include <stdbool.h>

-#define MOD2WAV_FREQ 96000

 bool renderToWav(char *fileName, bool checkIfFileExist);

--- a/src/pt2_pat2smp.c

+++ b/src/pt2_pat2smp.c

@@ -31,7 +31,7 @@

 		return;

-	editor.dPat2SmpBuf = (double *)malloc((MAX_SAMPLE_LEN*2) * sizeof (double));

+	editor.dPat2SmpBuf = (double *)malloc(MAX_SAMPLE_LEN * sizeof (double));

 	if (editor.dPat2SmpBuf == NULL)

 		statusOutOfMemory();

@@ -54,6 +54,8 @@

 	int64_t tickSampleCounter64 = 0;

+	clearMixerDownsamplerStates();

 	editor.smpRenderingDone = false;

 	while (!editor.smpRenderingDone && editor.songPlaying)

@@ -71,8 +73,7 @@

 	editor.isSMPRendering = false;

-	//const double dSamplesPerTick = audio.samplesPerTick64 / (UINT32_MAX+1.0);

+	clearMixerDownsamplerStates();

 	resetSong();

 	int32_t renderLength = editor.pat2SmpPos;

@@ -82,20 +83,13 @@

 	// set back old row

 	song->currRow = song->row = oldRow;

-	// downsample oversampled buffer, normalize and quantize to 8-bit

+	// normalize and quantize to 8-bit

-	downsample2xDouble(editor.dPat2SmpBuf, renderLength);

-	renderLength /= 2;

 	double dAmp = 1.0;

 	const double dPeak = getDoublePeak(editor.dPat2SmpBuf, renderLength);

 	if (dPeak > 0.0)

 		dAmp = INT8_MAX / dPeak;

-	double dVol = 64.0 * dPeak;

-	if (dVol > 64.0)

-		dVol = 64.0;

 	int8_t *smpPtr = &song->sampleData[s->offset];

 	for (int32_t i = 0; i < renderLength; i++)

@@ -122,7 +116,7 @@

 	s->length = (uint16_t)renderLength;

-	s->volume = (int8_t)round(dVol);

+	s->volume = 64;

 	s->loopStart = 0;

 	s->loopLength = 2;

--- a/src/pt2_pat2smp.h

+++ b/src/pt2_pat2smp.h

@@ -2,12 +2,10 @@

 #include "pt2_header.h"

-// we do 2x oversampling for BLEP synthesis to work right on all ProTracker pitches

 #define PAT2SMP_HI_PERIOD 124 /* A-3 finetune +4, 28603.99Hz */

 #define PAT2SMP_LO_PERIOD 170 /* E-3 finetune  0, 20864.08Hz */

-#define PAT2SMP_HI_FREQ (PAULA_PAL_CLK / (PAT2SMP_HI_PERIOD / 2.0))

-#define PAT2SMP_LO_FREQ (PAULA_PAL_CLK / (PAT2SMP_LO_PERIOD / 2.0))

+#define PAT2SMP_HI_FREQ ((double)PAULA_PAL_CLK / PAT2SMP_HI_PERIOD)

+#define PAT2SMP_LO_FREQ ((double)PAULA_PAL_CLK / PAT2SMP_LO_PERIOD)

 void doPat2Smp(void);

--- a/src/pt2_rcfilter.c

+++ b/src/pt2_rcfilter.c

@@ -1,73 +1,59 @@

-// 1-pole 6dB/oct RC filters, code by aciddose (I think?)

+/* 1-pole 6dB/oct RC filters, from:

+** https://www.musicdsp.org/en/latest/Filters/116-one-pole-lp-and-hp.html

+**

+** There's no frequency pre-warping with tan(), but doing that would

+** result in a cutoff that sounded slightly too low.

+*/

 #include <stdint.h>

-#include <math.h>

+#include "pt2_math.h"

 #include "pt2_rcfilter.h"

-void calcRCFilterCoeffs(double dSr, double dHz, rcFilter_t *f)

+void calcRCFilterCoeffs(double sr, double hz, rcFilter_t *f)

-	const double pi = 4.0 * atan(1.0); // M_PI can not be trusted

+	const double a = (hz < sr/2.0) ? pt2_cos((PT2_TWO_PI * hz) / sr) : 1.0;

+	const double b = 2.0 - a;

+	const double c = b - pt2_sqrt((b*b)-1.0);

-	const double c = (dHz < (dSr / 2.0)) ? tan((pi * dHz) / dSr) : 1.0;

-	f->c = c;

-	f->c2 = f->c * 2.0;

-	f->g = 1.0 / (1.0 + f->c);

-	f->cg = f->c * f->g;

+	f->c1 = 1.0 - c;

+	f->c2 = c;

 void clearRCFilterState(rcFilter_t *f)

-	f->buffer[0] = 0.0; // left channel

-	f->buffer[1] = 0.0; // right channel

+	f->tmp[0] = f->tmp[1] = 0.0;

-// aciddose: input 0 is resistor side of capacitor (low-pass), input 1 is reference side (high-pass)

-static inline double getLowpassOutput(rcFilter_t *f, const double input_0, const double input_1, const double buffer)

-{

-	double dOutput = DENORMAL_OFFSET;

-	dOutput += buffer * f->g + input_0 * f->cg + input_1 * (1.0 - f->cg);

-	return dOutput;

-}

 void RCLowPassFilterStereo(rcFilter_t *f, const double *in, double *out)

-	double output;

+	// left channel

+	f->tmp[0] = (f->c1*in[0] + f->c2*f->tmp[0]) + DENORMAL_OFFSET;

+	out[0] = f->tmp[0];

-	// left channel RC low-pass

-	output = getLowpassOutput(f, in[0], 0.0, f->buffer[0]);

-	f->buffer[0] += (in[0] - output) * f->c2;

-	out[0] = output;

-	// right channel RC low-pass

-	output = getLowpassOutput(f, in[1], 0.0, f->buffer[1]);

-	f->buffer[1] += (in[1] - output) * f->c2;

-	out[1] = output;

+	// right channel

+	f->tmp[1] = (f->c1*in[1] + f->c2*f->tmp[1]) + DENORMAL_OFFSET;

+	out[1] = f->tmp[1];

 void RCHighPassFilterStereo(rcFilter_t *f, const double *in, double *out)

-	double low[2];

+	// left channel

+	f->tmp[0] = (f->c1*in[0] + f->c2*f->tmp[0]) + DENORMAL_OFFSET;

+	out[0] = in[0]-f->tmp[0];

-	RCLowPassFilterStereo(f, in, low);

-	out[0] = in[0] - low[0]; // left channel high-pass

-	out[1] = in[1] - low[1]; // right channel high-pass

+	// right channel

+	f->tmp[1] = (f->c1*in[1] + f->c2*f->tmp[1]) + DENORMAL_OFFSET;

+	out[1] = in[1]-f->tmp[1];

 void RCLowPassFilter(rcFilter_t *f, const double in, double *out)

-	double output = getLowpassOutput(f, in, 0.0, f->buffer[0]);

-	f->buffer[0] += (in - output) * f->c2;

-	*out = output;

+	f->tmp[0] = (f->c1*in + f->c2*f->tmp[0]) + DENORMAL_OFFSET;

+	*out = f->tmp[0];

 void RCHighPassFilter(rcFilter_t *f, const double in, double *out)

-	double low;

-	RCLowPassFilter(f, in, &low);

-	*out = in - low; // high-pass

+	f->tmp[0] = (f->c1*in + f->c2*f->tmp[0]) + DENORMAL_OFFSET;

+	*out = in-f->tmp[0];

--- a/src/pt2_rcfilter.h

+++ b/src/pt2_rcfilter.h

@@ -3,19 +3,14 @@

 #include <stdint.h>

 #include <stdbool.h>

-// adding this prevents denormalized numbers, which is slow

-#define DENORMAL_OFFSET 1e-15

 typedef struct rcFilter_t

-	double buffer[2];

-	double c, c2, g, cg;

+	double tmp[2], c1, c2;

 } rcFilter_t;

-void calcRCFilterCoeffs(const double sr, const double hz, rcFilter_t *f);

+void calcRCFilterCoeffs(double sr, double hz, rcFilter_t *f);

 void clearRCFilterState(rcFilter_t *f);

 void RCLowPassFilterStereo(rcFilter_t *f, const double *in, double *out);

 void RCHighPassFilterStereo(rcFilter_t *f, const double *in, double *out);

 void RCLowPassFilter(rcFilter_t *f, const double in, double *out);

 void RCHighPassFilter(rcFilter_t *f, const double in, double *out);

--- a/src/pt2_replayer.c

+++ b/src/pt2_replayer.c

@@ -1206,9 +1206,7 @@

 	int64_t samplesPerTick64;

 	if (editor.isSMPRendering)

-		samplesPerTick64 = editor.pat2SmpHQ ? audio.bpmTable28kHz[bpm] : audio.bpmTable22kHz[bpm];

-	else if (editor.isWAVRendering)

-		samplesPerTick64 = audio.bpmTableMod2Wav[bpm];

+		samplesPerTick64 = editor.pat2SmpHQ ? audio.bpmTable28kHz[bpm] : audio.bpmTable20kHz[bpm];

 	else

 		samplesPerTick64 = audio.bpmTable[bpm];

@@ -1516,7 +1514,7 @@

 	editor.playMode = PLAY_MODE_NORMAL;

 	editor.blockMarkFlag = false;

-	audio.forceMixerOff = true;

+	audio.forceSoundCardSilence = true;

 	song->row = 0;

 	song->currRow = 0;

@@ -1550,9 +1548,9 @@

 	turnOffVoices();

-	memset((int8_t *)editor.vuMeterVolumes,0, sizeof (editor.vuMeterVolumes));

+	memset((int8_t *)editor.vuMeterVolumes,     0, sizeof (editor.vuMeterVolumes));

 	memset((int8_t *)editor.realVuMeterVolumes, 0, sizeof (editor.realVuMeterVolumes));

-	memset((int8_t *)editor.spectrumVolumes, 0, sizeof (editor.spectrumVolumes));

+	memset((int8_t *)editor.spectrumVolumes,    0, sizeof (editor.spectrumVolumes));

 	memset(song->channels, 0, sizeof (song->channels));

 	for (uint8_t i = 0; i < AMIGA_VOICES; i++)

@@ -1579,5 +1577,5 @@

 	song->tick = 0;

 	modRenderDone = false;

-	audio.forceMixerOff = false;

+	audio.forceSoundCardSilence = false;

--- a/src/pt2_sampling.c

+++ b/src/pt2_sampling.c

@@ -1,6 +1,10 @@

 /* Experimental audio sampling support.

 ** There may be several bad practices here, as I don't really

 ** have the proper knowledge on this stuff.

+**

+** Some functions like sin() may be different depending on

+** math library implementation, but we don't use pt_math.c

+** replacements for speed reasons.

*/

 // for finding memory leaks in debug mode with Visual Studio

@@ -24,6 +28,7 @@

 #include "pt2_tables.h"

 #include "pt2_config.h"

 #include "pt2_sampling.h"

+#include "pt2_math.h" // PT2_PI

 enum

@@ -39,6 +44,7 @@

 #define SINC_TAPS 64

 #define SINC_TAPS_BITS 6 /* log2(SINC_TAPS) */

 #define SINC_PHASES 4096

+#define MID_TAP ((SINC_TAPS/2)*SINC_PHASES)

 #define SAMPLE_PREVIEW_WITDH 194

 #define SAMPLE_PREVIEW_HEIGHT 38

@@ -53,7 +59,7 @@

 static int32_t samplingMode = SAMPLE_MIX, inputFrequency, roundedOutputFrequency;

 static int32_t numAudioInputDevs, audioInputDevListOffset, selectedDev;

 static int32_t bytesSampled, maxSamplingLength, inputBufferSize;

-static double dOutputFrequency, *dSincTable, *dSamplingBuffer, *dSamplingBufferOrig;

+static double dOutputFrequency, *dSincTable, *dKaiserTable, *dSamplingBuffer, *dSamplingBufferOrig;

 static SDL_AudioDeviceID recordDev;

/*

@@ -65,6 +71,7 @@

 static double Izero(double y) // Compute Bessel function Izero(y) using a series approximation

 	double s = 1.0, ds = 1.0, d = 0.0;

+	const double epsilon = 1E-9; // 8bb: 1E-7 -> 1E-9 for added precision (still fast to calculate)

do

@@ -72,26 +79,66 @@

 		ds = ds * (y * y) / (d * d);

 		s = s + ds;

-	while (ds > 1E-7 * s);

+	while (ds > epsilon * s);

 	return s;

-static bool initSincTable(double cutoff)

+bool initKaiserTable(void) // called once on tracker init

-	if (cutoff > 0.999)

-		cutoff = 0.999;

+	dKaiserTable = (double *)malloc(SINC_TAPS * SINC_PHASES * sizeof (double));

+	if (dKaiserTable == NULL)

+	{

+		showErrorMsgBox("Out of memory!");

+		return false;

+	}

+	const double beta = 9.6377;

+	const double izeroBeta = Izero(beta);

+	for (int32_t i = 0; i < SINC_TAPS*SINC_PHASES; i++)

+	{

+		double fkaiser;

+		int32_t ix = (SINC_TAPS-1) - (i & (SINC_TAPS-1));

+		ix = (ix * SINC_PHASES) + (i >> SINC_TAPS_BITS);

+		if (ix == MID_TAP)

+		{

+			fkaiser = 1.0;

+		}

+		else

+		{

+			const double x = (ix - MID_TAP) * (1.0 / SINC_PHASES);

+			const double xMul = 1.0 / ((SINC_TAPS/2) * (SINC_TAPS/2));

+			fkaiser = Izero(beta * sqrt(1.0 - x * x * xMul)) / izeroBeta;

+		}

+		dKaiserTable[i] = fkaiser;

+	}

+	return true;

+}

+void freeKaiserTable(void)

+{

+	if (dKaiserTable != NULL)

+	{

+		free(dKaiserTable);

+		dKaiserTable = NULL;

+	}

+}

+// calculated after completion of sampling (before downsampling)

+static bool initSincTable(double cutoff)

+{

 	dSincTable = (double *)malloc(SINC_TAPS * SINC_PHASES * sizeof (double));

 	if (dSincTable == NULL)

 		return false;

-	const double beta = 9.6377; // this value can maybe be tweaked (we do downsampling only)

-	const double izeroBeta = Izero(beta);

-	const double kPi = 4.0 * atan(1.0) * cutoff;

+	if (cutoff > 1.0)

+		cutoff = 1.0;

-#define MID_TAP ((SINC_TAPS/2)*SINC_PHASES)

+	const double kPi = PT2_PI * cutoff;

 	for (int32_t i = 0; i < SINC_TAPS*SINC_PHASES; i++)

 		double fsinc;

@@ -107,8 +154,7 @@

 			const double x = (ix - MID_TAP) * (1.0 / SINC_PHASES);

 			const double xPi = x * kPi;

-			const double xMul = 1.0 / ((SINC_TAPS/2) * (SINC_TAPS/2));

-			fsinc = sin(xPi) * Izero(beta * sqrt(1.0 - x * x * xMul)) / (izeroBeta * xPi); // Kaiser window

+			fsinc = (sin(xPi) / xPi) * dKaiserTable[i];

 		dSincTable[i] = fsinc * cutoff;

@@ -151,7 +197,7 @@

 		samplingNote = 35;

 	int32_t period = periodTable[((samplingFinetune & 0xF) * 37) + samplingNote];

-	if (period < 113) // this happens internally in our Paula mixer

+	if (period < 113) // also happens in our "set period" Paula function

 		period = 113;

 	dOutputFrequency = (double)PAULA_PAL_CLK / period;

@@ -167,7 +213,7 @@

 		if (len > SAMPLING_BUFFER_SIZE)

 			len = SAMPLING_BUFFER_SIZE;

-		const int16_t *L = (int16_t *)stream;

+		const int16_t *L =  (int16_t *)stream;

 		const int16_t *R = ((int16_t *)stream) + 1;

 		int16_t *dst16 = displayBuffer;

@@ -287,8 +333,6 @@

 		return;

 	listAudioDevices();

-	changeStatusText("PLEASE WAIT ...");

-	flipFrame();

 	stopInputAudio();

 	selectedDev = dev;

@@ -432,6 +476,9 @@

 void renderSamplingBox(void)

+	changeStatusText("PLEASE WAIT ...");

+	flipFrame();

 	editor.sampleZero = false;

 	editor.blockMarkFlag = false;

@@ -459,8 +506,8 @@

 	selectAudioDevice(selectedDev);

 	showCurrSample();

 	modStop();

 	editor.songPlaying = false;

 	editor.playMode = PLAY_MODE_NORMAL;

 	editor.currMode = MODE_IDLE;

@@ -557,9 +604,9 @@

 	assert(roundedOutputFrequency > 0);

-	maxSamplingLength = (int32_t)(ceil((65534.0*inputFrequency) / dOutputFrequency)) + 1;

+	maxSamplingLength = (int32_t)(ceil(((double)MAX_SAMPLE_LEN*inputFrequency) / dOutputFrequency)) + 1;

-	int32_t allocLen = (SINC_TAPS/2) + maxSamplingLength + (SINC_TAPS/2);

+	const int32_t allocLen = (SINC_TAPS/2) + maxSamplingLength + (SINC_TAPS/2);

 	dSamplingBufferOrig = (double *)malloc(allocLen * sizeof (double));

 	if (dSamplingBufferOrig == NULL)

@@ -568,7 +615,7 @@

 	dSamplingBuffer = dSamplingBufferOrig + (SINC_TAPS/2); // allow negative look-up for sinc taps

-	// clear tap area

+	// clear tap area before sample

 	memset(dSamplingBufferOrig, 0, (SINC_TAPS/2) * sizeof (double));

 	bytesSampled = 0;

@@ -583,7 +630,7 @@

 static int32_t downsampleSamplingBuffer(void)

-	// clear tap area

+	// clear tap area after sample

 	memset(&dSamplingBuffer[bytesSampled], 0, (SINC_TAPS/2) * sizeof (double));

 	const int32_t readLength = bytesSampled;

@@ -602,7 +649,6 @@

 	if (!initSincTable(dRatio))

-		free(dBuffer);

 		statusOutOfMemory();

 		return -1;

@@ -612,7 +658,7 @@

 	int8_t *output = &song->sampleData[song->samples[editor.currSample].offset];

 	const double dDelta = inputFrequency / dOutputFrequency;

-	// pre-centered (this is safe, look at how fSamplingBufferOrig is alloc'd)

+	// pre-centered (this is safe, look at how dSamplingBufferOrig is alloc'd)

 	const double *dSmpPtr = &dSamplingBuffer[-((SINC_TAPS/2)-1)];

 	double dPhase = 0.0;

@@ -622,12 +668,15 @@

 		double dSmp = sinc(dSmpPtr, dPhase);

 		dBuffer[i] = dSmp;

-		dSmp = fabs(dSmp);

+		// dSmp = fabs(dSmp)

+		if (dSmp < 0.0)

+			dSmp = -dSmp;

 		if (dSmp > dPeakAmp)

 			dPeakAmp = dSmp;

 		dPhase += dDelta;

-		const int32_t wholeSamples = (const int32_t)dPhase;

+		const int32_t wholeSamples = (int32_t)dPhase;

 		dPhase -= wholeSamples;

 		dSmpPtr += wholeSamples;

@@ -647,8 +696,13 @@

 	for (int32_t i = 0; i < writeLength; i++)

-		const int32_t smp32 = (const int32_t)round(dBuffer[i] * dAmp);

-		assert(smp32 >= -128 && smp32 <= 127); // shouldn't happen according to dAmp (but just in case)

+		double dSmp = dBuffer[i] * dAmp;

+		// faster than calling round()

+		     if (dSmp < 0.0) dSmp -= 0.5;

+		else if (dSmp > 0.0) dSmp += 0.5;

+		const int32_t smp32 = (int32_t)dSmp; // rounded

 		output[i] = (int8_t)smp32;

--- a/src/pt2_sampling.h

+++ b/src/pt2_sampling.h

@@ -3,6 +3,9 @@

 #include <stdint.h>

 #include <stdbool.h>

+bool initKaiserTable(void); // called once on tracker init

+void freeKaiserTable(void);

 void stopSampling(void);

 void freeAudioDeviceList(void);

 void renderSampleMonitor(void);

--- a/vs2019_project/pt2-clone/pt2-clone.vcxproj

+++ b/vs2019_project/pt2-clone/pt2-clone.vcxproj

@@ -97,7 +97,6 @@

       <BasicRuntimeChecks>Default</BasicRuntimeChecks>

       <FunctionLevelLinking>true</FunctionLevelLinking>

       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>

-      <FloatingPointModel>Fast</FloatingPointModel>

       <DebugInformationFormat>None</DebugInformationFormat>

       <OmitFramePointers>true</OmitFramePointers>

       <CompileAsWinRT>false</CompileAsWinRT>

@@ -143,7 +142,6 @@

       <BasicRuntimeChecks>Default</BasicRuntimeChecks>

       <FunctionLevelLinking>true</FunctionLevelLinking>

       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>

-      <FloatingPointModel>Fast</FloatingPointModel>

       <DebugInformationFormat>None</DebugInformationFormat>

       <OmitFramePointers>true</OmitFramePointers>

       <BufferSecurityCheck>false</BufferSecurityCheck>

@@ -178,7 +176,6 @@

     <ClCompile>

       <WarningLevel>Level4</WarningLevel>

       <PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;DEBUG;_DEBUG;WIN32;_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>

-      <FloatingPointModel>Fast</FloatingPointModel>

       <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>

       <TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>

       <MultiProcessorCompilation>true</MultiProcessorCompilation>

@@ -207,7 +204,6 @@

     <ClCompile>

       <WarningLevel>Level4</WarningLevel>

       <PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;DEBUG;_DEBUG;WIN32;_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>

-      <FloatingPointModel>Fast</FloatingPointModel>

       <OmitFramePointers>false</OmitFramePointers>

       <TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>

       <MultiProcessorCompilation>true</MultiProcessorCompilation>

@@ -252,6 +248,7 @@

     <ClInclude Include="..\..\src\pt2_helpers.h" />

     <ClInclude Include="..\..\src\pt2_keyboard.h" />

     <ClInclude Include="..\..\src\pt2_ledfilter.h" />

+    <ClInclude Include="..\..\src\pt2_math.h" />

     <ClInclude Include="..\..\src\pt2_mod2wav.h" />

     <ClInclude Include="..\..\src\pt2_module_loader.h" />

     <ClInclude Include="..\..\src\pt2_module_saver.h" />

@@ -305,6 +302,7 @@

     <ClCompile Include="..\..\src\pt2_keyboard.c" />

     <ClCompile Include="..\..\src\pt2_ledfilter.c" />

     <ClCompile Include="..\..\src\pt2_main.c" />

+    <ClCompile Include="..\..\src\pt2_math.c" />

     <ClCompile Include="..\..\src\pt2_mod2wav.c" />

     <ClCompile Include="..\..\src\pt2_module_loader.c" />

     <ClCompile Include="..\..\src\pt2_rcfilter.c" />

--- a/vs2019_project/pt2-clone/pt2-clone.vcxproj.filters

+++ b/vs2019_project/pt2-clone/pt2-clone.vcxproj.filters

@@ -102,6 +102,9 @@

     <ClInclude Include="..\..\src\pt2_downsamplers2x.h">

       <Filter>headers</Filter>

     </ClInclude>

+    <ClInclude Include="..\..\src\pt2_math.h">

+      <Filter>headers</Filter>

+    </ClInclude>

   </ItemGroup>

   <ItemGroup>

     <ClCompile Include="..\..\src\pt2_audio.c" />

@@ -190,6 +193,7 @@

     <ClCompile Include="..\..\src\pt2_ledfilter.c" />

     <ClCompile Include="..\..\src\pt2_chordmaker.c" />

     <ClCompile Include="..\..\src\pt2_downsample2x.c" />

+    <ClCompile Include="..\..\src\pt2_math.c" />

   </ItemGroup>

   <ItemGroup>

     <ResourceCompile Include="..\..\src\pt2-clone.rc" />

--

⑨