shithub: pt2-clone

Download patch

ref: b8e8e8a8ef726fbe428edc74bdd9de886863075c
parent: 67803a8fc77c8b570476cdcb749cfb44da9c64d8
author: Olav Sørensen <olav.sorensen@live.no>
date: Sat Sep 4 14:27:50 EDT 2021

Push v1.33 code

--- a/src/pt2_audio.c
+++ b/src/pt2_audio.c
@@ -15,11 +15,11 @@
 #else
 #include <unistd.h>
 #endif
-#include <math.h> // sqrt(),tan()
 #include <fcntl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <limits.h>
+#include "pt2_math.h"
 #include "pt2_audio.h"
 #include "pt2_header.h"
 #include "pt2_helpers.h"
@@ -37,22 +37,29 @@
 #include "pt2_ledfilter.h"
 #include "pt2_downsamplers2x.h"
 
+#define STEREO_NORM_FACTOR 0.5 /* cumulative mid/side normalization factor (1/sqrt(2))*(1/sqrt(2)) */
+
 #define INITIAL_DITHER_SEED 0x12345000
 
 static volatile bool ledFilterEnabled;
 static volatile uint8_t filterModel;
-static int8_t defStereoSep;
 static bool amigaPanFlag;
-static int32_t randSeed = INITIAL_DITHER_SEED;
+static int32_t randSeed = INITIAL_DITHER_SEED, stereoSeparation = 100;
 static uint32_t audLatencyPerfValInt, audLatencyPerfValFrac;
 static uint64_t tickTime64, tickTime64Frac;
 static double *dMixBufferL, *dMixBufferR, *dMixBufferLUnaligned, *dMixBufferRUnaligned;
-static double dPrngStateL, dPrngStateR, dLState[2], dRState[2];
+static double dPrngStateL, dPrngStateR, dSideFactor;
 static blep_t blep[AMIGA_VOICES], blepVol[AMIGA_VOICES];
-static rcFilter_t filterLoA500, filterHiA500, filterHiA1200;
+static rcFilter_t filterLoA500, filterHiA500, filterLoA1200, filterHiA1200;
 static ledFilter_t filterLED;
 static SDL_AudioDeviceID dev;
 
+static void processFiltersA1200_NoLED(int32_t numSamples);
+static void processFiltersA1200_LED(int32_t numSamples);
+static void processFiltersA500_NoLED(int32_t numSamples);
+static void processFiltersA500_LED(int32_t numSamples);
+static void (*processFiltersFunc)(int32_t);
+
 // for audio/video syncing
 static uint32_t tickTimeLen, tickTimeLenFrac;
 
@@ -62,6 +69,24 @@
 
 bool intMusic(void); // defined in pt_modplayer.c
 
+static void updateFilterFunc(void)
+{
+	if (filterModel == FILTERMODEL_A500)
+	{
+		if (ledFilterEnabled)
+			processFiltersFunc = processFiltersA500_LED;
+		else
+			processFiltersFunc = processFiltersA500_NoLED;
+	}
+	else // A1200
+	{
+		if (ledFilterEnabled)
+			processFiltersFunc = processFiltersA1200_LED;
+		else
+			processFiltersFunc = processFiltersA1200_NoLED;
+	}
+}
+
 void setLEDFilter(bool state, bool doLockAudio)
 {
 	const bool audioWasntLocked = !audio.locked;
@@ -72,6 +97,7 @@
 
 	editor.useLEDFilter = state;
 	ledFilterEnabled = editor.useLEDFilter;
+	updateFilterFunc();
 
 	if (doLockAudio && audioWasntLocked)
 		unlockAudio();
@@ -87,6 +113,7 @@
 
 	editor.useLEDFilter ^= 1;
 	ledFilterEnabled = editor.useLEDFilter;
+	updateFilterFunc();
 
 	if (audioWasntLocked)
 		unlockAudio();
@@ -160,10 +187,6 @@
 	if (audioWasntLocked)
 		lockAudio();
 
-	// copy old pans
-	const double dOldPanL = paula[ch].dPanL;
-	const double dOldPanR = paula[ch].dPanR;
-
 	memset(&paula[ch], 0, sizeof (paulaVoice_t));
 	memset(&blep[ch], 0, sizeof (blep_t));
 	memset(&blepVol[ch], 0, sizeof (blep_t));
@@ -171,10 +194,6 @@
 	stopScope(ch); // it should be safe to clear the scope now
 	memset(&scope[ch], 0, sizeof (scope_t));
 
-	// restore old pans
-	paula[ch].dPanL = dOldPanL;
-	paula[ch].dPanR = dOldPanR;
-
 	if (audioWasntLocked)
 		unlockAudio();
 }
@@ -189,6 +208,7 @@
 		mixerKillVoice(i);
 
 	clearRCFilterState(&filterLoA500);
+	clearRCFilterState(&filterLoA1200);
 	clearRCFilterState(&filterHiA500);
 	clearRCFilterState(&filterHiA1200);
 	clearLEDFilterState(&filterLED);
@@ -242,15 +262,13 @@
 
 		// this period is not cached, calculate mixer deltas
 
-		// during PAT2SMP or doing MOD2WAV, use different audio output rates
+		// during PAT2SMP, use different audio output rates
 		if (editor.isSMPRendering)
 			dPeriodToDeltaDiv = editor.pat2SmpHQ ? (PAULA_PAL_CLK / PAT2SMP_HI_FREQ) : (PAULA_PAL_CLK / PAT2SMP_LO_FREQ);
-		else if (editor.isWAVRendering)
-			dPeriodToDeltaDiv = PAULA_PAL_CLK / (double)MOD2WAV_FREQ;
 		else
 			dPeriodToDeltaDiv = audio.dPeriodToDeltaDiv;
 
-		v->dOldVoiceDelta = dPeriodToDeltaDiv / realPeriod;
+		v->dOldVoiceDelta = (dPeriodToDeltaDiv / realPeriod) * 0.5; // /2 since we do 2x oversampling
 
 		// for BLEP synthesis (prevents division in inner mix loop)
 		v->dOldVoiceDeltaMul = 1.0 / v->dOldVoiceDelta;
@@ -386,11 +404,14 @@
 		lockAudio();
 
 	clearRCFilterState(&filterLoA500);
+	clearRCFilterState(&filterLoA1200);
 	clearRCFilterState(&filterHiA500);
 	clearRCFilterState(&filterHiA1200);
 	clearLEDFilterState(&filterLED);
 
 	filterModel ^= 1;
+	updateFilterFunc();
+
 	if (filterModel == FILTERMODEL_A500)
 		displayMsg("AUDIO: AMIGA 500");
 	else
@@ -402,6 +423,7 @@
 
 void mixChannels(int32_t numSamples)
 {
+	double *dMixBufSelect[AMIGA_VOICES] = { dMixBufferL, dMixBufferR, dMixBufferR, dMixBufferL };
 	double dSmp, dVol;
 	blep_t *bSmp, *bVol;
 	paulaVoice_t *v;
@@ -418,6 +440,7 @@
 		if (!v->active || v->data == NULL)
 			continue;
 
+		double *dMixBuf = dMixBufSelect[i];
 		for (int32_t j = 0; j < numSamples; j++)
 		{
 			assert(v->data != NULL);
@@ -444,11 +467,8 @@
 			if (bSmp->samplesLeft > 0) dSmp = blepRun(bSmp, dSmp);
 			if (bVol->samplesLeft > 0) dVol = blepRun(bVol, dVol);
 
-			dSmp *= dVol;
+			dMixBuf[j] += dSmp * dVol;
 
-			dMixBufferL[j] += dSmp * v->dPanL;
-			dMixBufferR[j] += dSmp * v->dPanR;
-
 			v->dPhase += v->dDelta;
 			if (v->dPhase >= 1.0) // deltas can't be >= 1.0, so this is safe
 			{
@@ -478,12 +498,6 @@
 	dPrngStateR = 0.0;
 }
 
-void resetAudioDownsamplingStates(void)
-{
-	dLState[0] = dLState[1] = 0.0;
-	dRState[0] = dRState[1] = 0.0;
-}
-
 static inline int32_t random32(void)
 {
 	// LCG random 32-bit generator (quite good and fast)
@@ -492,94 +506,195 @@
 	return randSeed;
 }
 
-static void processMixedSamples(int32_t i, int16_t *out)
+static void processFiltersA1200_NoLED(int32_t numSamples)
 {
-	int32_t smp32;
-	double dPrng, dOut[2], dMixL[2], dMixR[2];
+	// apply filters
+	for (int32_t i = 0; i < numSamples; i++)
+	{
+		double dOut[2];
 
-	// we run the filters at 2x the audio output rate for more precision
-	for (int32_t j = 0; j < 2; j++)
+		dOut[0] = dMixBufferL[i];
+		dOut[1] = dMixBufferR[i];
+
+		// low-pass filter
+		RCLowPassFilterStereo(&filterLoA1200, dOut, dOut);
+
+		// high-pass RC filter
+		RCHighPassFilterStereo(&filterHiA1200, dOut, dOut);
+
+		dMixBufferL[i] = dOut[0];
+		dMixBufferR[i] = dOut[1];
+	}
+}
+
+static void processFiltersA1200_LED(int32_t numSamples)
+{
+	// apply filters
+	for (int32_t i = 0; i < numSamples; i++)
 	{
-		// zero-padding (yes, this makes sense)
-		dOut[0] = (j == 0) ? dMixBufferL[i] : 0.0;
-		dOut[1] = (j == 0) ? dMixBufferR[i] : 0.0;
+		double dOut[2];
 
-		if (filterModel == FILTERMODEL_A500)
-		{
-			// A500 low-pass RC filter
-			RCLowPassFilterStereo(&filterLoA500, dOut, dOut);
+		dOut[0] = dMixBufferL[i];
+		dOut[1] = dMixBufferR[i];
 
-			// "LED" Sallen-Key filter
-			if (ledFilterEnabled)
-				LEDFilter(&filterLED, dOut, dOut);
+		// low-pass filter
+		RCLowPassFilterStereo(&filterLoA1200, dOut, dOut);
 
-			// A500 high-pass RC filter
-			RCHighPassFilterStereo(&filterHiA500, dOut, dOut);
-		}
-		else
-		{
-			// A1200 low-pass filter is ignored (we don't want it)
+		// "LED" Sallen-Key filter
+		LEDFilter(&filterLED, dOut, dOut);
 
-			// "LED" Sallen-Key filter
-			if (ledFilterEnabled)
-				LEDFilter(&filterLED, dOut, dOut);
+		// high-pass RC filter
+		RCHighPassFilterStereo(&filterHiA1200, dOut, dOut);
 
-			// A1200 high-pass RC filter
-			RCHighPassFilterStereo(&filterHiA1200, dOut, dOut);
-		}
+		dMixBufferL[i] = dOut[0];
+		dMixBufferR[i] = dOut[1];
+	}
+}
 
-		dMixL[j] = dOut[0];
-		dMixR[j] = dOut[1];
+static void processFiltersA500_NoLED(int32_t numSamples)
+{
+	for (int32_t i = 0; i < numSamples; i++)
+	{
+		double dOut[2];
+
+		dOut[0] = dMixBufferL[i];
+		dOut[1] = dMixBufferR[i];
+
+		// low-pass RC filter
+		RCLowPassFilterStereo(&filterLoA500, dOut, dOut);
+
+		// high-pass RC filter
+		RCHighPassFilterStereo(&filterHiA500, dOut, dOut);
+
+		dMixBufferL[i] = dOut[0];
+		dMixBufferR[i] = dOut[1];
 	}
+}
 
-#define NORMALIZE_DOWNSAMPLE 2.0
+static void processFiltersA500_LED(int32_t numSamples)
+{
+	for (int32_t i = 0; i < numSamples; i++)
+	{
+		double dOut[2];
 
-	// 2x "all-pass halfband" downsampling
-	dOut[0] = d2x(dMixL, dLState);
-	dOut[1] = d2x(dMixR, dRState);
+		dOut[0] = dMixBufferL[i];
+		dOut[1] = dMixBufferR[i];
 
-	// normalize and invert phase (A500/A1200 has a phase-inverted audio signal)
-	dOut[0] *= NORMALIZE_DOWNSAMPLE * (-INT16_MAX / (double)AMIGA_VOICES);
-	dOut[1] *= NORMALIZE_DOWNSAMPLE * (-INT16_MAX / (double)AMIGA_VOICES);
+		// low-pass RC filter
+		RCLowPassFilterStereo(&filterLoA500, dOut, dOut);
 
+		// "LED" Sallen-Key filter
+		LEDFilter(&filterLED, dOut, dOut);
+
+		// high-pass RC filter
+		RCHighPassFilterStereo(&filterHiA500, dOut, dOut);
+
+		dMixBufferL[i] = dOut[0];
+		dMixBufferR[i] = dOut[1];
+	}
+}
+
+#define NORM_FACTOR 2.0 /* nominally correct, but can clip from high-pass filter overshoot */
+static inline void processMixedSamplesAmigaPanning(int32_t i, int16_t *out)
+{
+	int32_t smp32;
+	double dPrng, dL, dR;
+
+	// 2x downsampling (decimation)
+	const uint32_t offset1 = (i << 1) + 0;
+	const uint32_t offset2 = (i << 1) + 1;
+	dL = decimate2x_L(dMixBufferL[offset1], dMixBufferL[offset2]);
+	dR = decimate2x_R(dMixBufferR[offset1], dMixBufferR[offset2]);
+
+	// normalize w/ phase-inversion (A500/A1200 has a phase-inverted audio signal)
+	dL *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
+	dR *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
+
 	// left channel - 1-bit triangular dithering (high-pass filtered)
 	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
-	dOut[0] = (dOut[0] + dPrng) - dPrngStateL;
+	dL = (dL + dPrng) - dPrngStateL;
 	dPrngStateL = dPrng;
-	smp32 = (int32_t)dOut[0];
+	smp32 = (int32_t)dL;
 	CLAMP16(smp32);
 	out[0] = (int16_t)smp32;
 
 	// right channel - 1-bit triangular dithering (high-pass filtered)
 	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
-	dOut[1] = (dOut[1] + dPrng) - dPrngStateR;
+	dR = (dR + dPrng) - dPrngStateR;
 	dPrngStateR = dPrng;
-	smp32 = (int32_t)dOut[1];
+	smp32 = (int32_t)dR;
 	CLAMP16(smp32);
 	out[1] = (int16_t)smp32;
 }
 
-void outputAudio(int16_t *target, int32_t numSamples)
+static inline void processMixedSamples(int32_t i, int16_t *out)
 {
-	int16_t out[2];
-	int32_t i;
+	int32_t smp32;
+	double dPrng, dL, dR;
 
+	// 2x downsampling (decimation)
+	const uint32_t offset1 = (i << 1) + 0;
+	const uint32_t offset2 = (i << 1) + 1;
+	dL = decimate2x_L(dMixBufferL[offset1], dMixBufferL[offset2]);
+	dR = decimate2x_R(dMixBufferR[offset1], dMixBufferR[offset2]);
+
+	// apply stereo separation
+	const double dOldL = dL;
+	const double dOldR = dR;
+	double dMid  = (dOldL + dOldR) * STEREO_NORM_FACTOR;
+	double dSide = (dOldL - dOldR) * dSideFactor;
+	dL = dMid + dSide;
+	dR = dMid - dSide;
+
+	// normalize w/ phase-inversion (A500/A1200 has a phase-inverted audio signal)
+	dL *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
+	dR *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
+
+	// left channel - 1-bit triangular dithering (high-pass filtered)
+	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
+	dL = (dL + dPrng) - dPrngStateL;
+	dPrngStateL = dPrng;
+	smp32 = (int32_t)dL;
+	CLAMP16(smp32);
+	out[0] = (int16_t)smp32;
+
+	// right channel - 1-bit triangular dithering (high-pass filtered)
+	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
+	dR = (dR + dPrng) - dPrngStateR;
+	dPrngStateR = dPrng;
+	smp32 = (int32_t)dR;
+	CLAMP16(smp32);
+	out[1] = (int16_t)smp32;
+}
+
+void outputAudio(int16_t *target, int32_t numSamples)
+{
 	if (editor.isSMPRendering)
 	{
 		// render to sample (PAT2SMP)
 
 		int32_t samplesTodo = numSamples;
-		if (editor.pat2SmpPos+samplesTodo > MAX_SAMPLE_LEN*2)
-			samplesTodo = (MAX_SAMPLE_LEN*2)-editor.pat2SmpPos;
+		if (editor.pat2SmpPos+samplesTodo > MAX_SAMPLE_LEN)
+			samplesTodo = MAX_SAMPLE_LEN-editor.pat2SmpPos;
 
-		mixChannels(samplesTodo);
+		// mix channels (at 2x rate, we do 2x oversampling)
+		mixChannels(samplesTodo*2);
 
 		double *dOutStream = &editor.dPat2SmpBuf[editor.pat2SmpPos];
-		for (i = 0; i < samplesTodo; i++)
-			dOutStream[i] = dMixBufferL[i] + dMixBufferR[i]; // normalized to -128..127 later
+		for (int32_t i = 0; i < samplesTodo; i++)
+		{
+			// 2x downsampling (decimation)
+			double dL, dR;
+			const uint32_t offset1 = (i << 1) + 0;
+			const uint32_t offset2 = (i << 1) + 1;
+			dL = decimate2x_L(dMixBufferL[offset1], dMixBufferL[offset2]);
+			dR = decimate2x_R(dMixBufferR[offset1], dMixBufferR[offset2]);
 
+			dOutStream[i] = (dL + dR) * 0.5; // normalized to -128..127 later
+		}
+
 		editor.pat2SmpPos += samplesTodo;
-		if (editor.pat2SmpPos >= MAX_SAMPLE_LEN*2)
+		if (editor.pat2SmpPos >= MAX_SAMPLE_LEN)
 		{
 			editor.smpRenderingDone = true;
 			updateWindowTitle(MOD_IS_MODIFIED);
@@ -587,18 +702,33 @@
 	}
 	else
 	{
-		// render to stream
+		// mix and filter channels (at 2x rate, we do 2x oversampling)
+		mixChannels(numSamples*2);
+		processFiltersFunc(numSamples*2);
 
-		mixChannels(numSamples);
-
+		// downsample, normalize and dither
+		int16_t out[2];
 		int16_t *outStream = target;
-		for (i = 0; i < numSamples; i++)
+		if (stereoSeparation == 100)
 		{
-			processMixedSamples(i, out);
+			for (int32_t i = 0; i < numSamples; i++)
+			{
+				processMixedSamplesAmigaPanning(i, out); // also does 2x downsampling
 
-			*outStream++ = out[0];
-			*outStream++ = out[1];
+				*outStream++ = out[0];
+				*outStream++ = out[1];
+			}
 		}
+		else
+		{
+			for (int32_t i = 0; i < numSamples; i++)
+			{
+				processMixedSamples(i, out); // also does 2x downsampling
+
+				*outStream++ = out[0];
+				*outStream++ = out[1];
+			}
+		}
 	}
 }
 
@@ -648,7 +778,7 @@
 
 static void SDLCALL audioCallback(void *userdata, Uint8 *stream, int len)
 {
-	if (audio.forceMixerOff) // during MOD2WAV
+	if (audio.forceSoundCardSilence) // during MOD2WAV
 	{
 		memset(stream, 0, len);
 		return;
@@ -732,127 +862,61 @@
 	** that didn't change before production (or changes that never reached production).
 	** This has been confirmed by measuring the components on several Amiga motherboards.
 	**
-	** Correct values for A500 (A500_R6.pdf):
-	** - RC 6dB/oct low-pass: R=360 ohm, C=0.1uF (f=4420.970Hz)
-	** - Sallen-key low-pass ("LED"): R1/R2=10k ohm, C1=6800pF, C2=3900pF (f=3090.532Hz)
-	** - RC 6dB/oct high-pass: R=1390 ohm (1000+390), C=22.33uF (22+0.33) (f=5.127Hz)
+	** Correct values for A500, >rev3 (?) (A500_R6.pdf):
+	** - 1-pole RC 6dB/oct low-pass: R=360 ohm, C=0.1uF
+	** - Sallen-key low-pass ("LED"): R1/R2=10k ohm, C1=6800pF, C2=3900pF
+	** - 1-pole RC 6dB/oct high-pass: R=1390 ohm (1000+390), C=22.33uF (22+0.33)
 	**
-	** Correct values for A1200 (A1200_R2.pdf):
-	** - RC 6dB/oct low-pass: R=680 ohm, C=6800pF (f=34419.321Hz)
-	** - Sallen-key low-pass ("LED"): Same as A500 (f=3090.532Hz)
-	** - RC 6dB/oct high-pass: R=1390 ohm (1000+390), C=22uF (f=5.204Hz)
+	** Correct values for A1200, all revs (A1200_R2.pdf):
+	** - 1-pole RC 6dB/oct low-pass: R=680 ohm, C=6800pF
+	** - Sallen-key low-pass ("LED"): R1/R2=10k ohm, C1=6800pF, C2=3900pF (same as A500)
+	** - 1-pole RC 6dB/oct high-pass: R=1390 ohm (1000+390), C=22uF
 	*/
-
-	// we run the filters at twice the frequency for improved precision (zero-padding)
-	const uint32_t audioFreq = audio.outputRate * 2;
-
+	const double dAudioFreq = audio.outputRate * 2.0; // *2 because we do 2x oversampling
 	double R, C, R1, R2, C1, C2, fc, fb;
-	const double pi = 4.0 * atan(1.0); // M_PI can not be trusted
 
-	/*
-	** 8bitbubsy:
-	** Hackish low-pass cutoff compensation to better match Amiga 500 when
-	** we use "lower" audio output rates. This has been loosely hand-picked
-	** after looking at many frequency analyses on a sine-sweep test module
-	** rendered on 7 different Amiga 500 machines (and taking the average).
-	** Don't try to make sense of this magic constant, and it should only be
-	** used within this very specific application!
-	**
-	** The reason we want this bias is because our digital RC filter is not
-	** that precise at lower audio output rates. It would otherwise lead to a
-	** slight unwanted cut of treble near the cutoff we aim for. It was easily
-	** audible, and especially visible on a plotted frequency spectrum.
-	**
-	** 1100Hz is the magic value I found that seems to be good. Higher than that
-	** would allow too much treble to pass.
-	**
-	** Scaling it like this is 'acceptable' (confirmed with further frequency analyses
-	** at output rates of 48, 96 and 192).
-	*/
-	double dLPCutoffBias = 1100.0 * (44100.0 / audio.outputRate);
-
 	// A500 1-pole (6db/oct) static RC low-pass filter:
-	R = 360.0; // R321 (360 ohm resistor)
-	C = 1e-7;  // C321 (0.1uF capacitor)
-	fc = (1.0 / (2.0 * pi * R * C)) + dLPCutoffBias;
-	calcRCFilterCoeffs(audioFreq, fc, &filterLoA500);
-	
-	/*
-	** 8bitbubsy:
-	** We don't handle Amiga 1200's ~34kHz low-pass filter as it's not really
-	** needed. The reason it was still present in the A1200 (despite its high
-	** non-audible cutoff) was to filter away high-frequency noise from Paula's
-	** PWM (volume modulation). We don't do PWM for volume in the PT2 clone.
-	*/
+	R = 360.0; // R321 (360 ohm)
+	C = 1e-7;  // C321 (0.1uF)
+	fc = (1.0 / (PT2_TWO_PI * R * C)); // cutoff = ~4420.97Hz
+	calcRCFilterCoeffs(dAudioFreq, fc, &filterLoA500);
 
-	// Sallen-Key filter ("LED" filter, same RC values on A500 and A1200):
-	R1 = 10000.0; // R322 (10K ohm resistor)
-	R2 = 10000.0; // R323 (10K ohm resistor)
-	C1 = 6.8e-9;  // C322 (6800pF capacitor)
-	C2 = 3.9e-9;  // C323 (3900pF capacitor)
-	fc = 1.0 / (2.0 * pi * sqrt(R1 * R2 * C1 * C2));
-	fb = 0.125; // Fb = 0.125 : Q ~= 1/sqrt(2)
-	calcLEDFilterCoeffs(audioFreq, fc, fb, &filterLED);
+	// A1200 1-pole (6db/oct) static RC low-pass filter:
+	R = 680.0;  // R321 (680 ohm)
+	C = 6.8e-9; // C321 (6800pF)
+	fc = (1.0 / (PT2_TWO_PI * R * C)); // cutoff = ~34419.32Hz
+	calcRCFilterCoeffs(dAudioFreq, fc, &filterLoA1200);
 
+	// Sallen-Key filter ("LED" filter, same values on A500/A1200):
+	R1 = 10000.0; // R322 (10K ohm)
+	R2 = 10000.0; // R323 (10K ohm)
+	C1 = 6.8e-9;  // C322 (6800pF)
+	C2 = 3.9e-9;  // C323 (3900pF)
+	fc = 1.0 / (PT2_TWO_PI * pt2_sqrt(R1 * R2 * C1 * C2)); // cutoff = ~3090.53Hz
+	fb = 0.125/2.0; // Fb = 0.125 : Q ~= 1/sqrt(2) (Butterworth) (8bb: was 0.125, but /2 gives a closer gain!)
+	calcLEDFilterCoeffs(dAudioFreq, fc, fb, &filterLED);
+
 	// A500 1-pole (6dB/oct) static RC high-pass filter:
-	R = 1390.0; // R324 (1K ohm resistor) + R325 (390 ohm resistor)
-	C = 2.233e-5; // C334 (22uF capacitor) + C335 (0.33�F capacitor)
-	fc = 1.0 / (2.0 * pi * R * C);
-	calcRCFilterCoeffs(audioFreq, fc, &filterHiA500);
+	R = 1390.0;   // R324 (1K ohm) + R325 (390 ohm)
+	C = 2.233e-5; // C334 (22uF) + C335 (0.33uF)
+	fc = 1.0 / (PT2_TWO_PI * R * C); // cutoff = ~5.13Hz
+	calcRCFilterCoeffs(dAudioFreq, fc, &filterHiA500);
 
 	// A1200 1-pole (6dB/oct) static RC high-pass filter:
 	R = 1390.0; // R324 (1K ohm resistor) + R325 (390 ohm resistor)
 	C = 2.2e-5; // C334 (22uF capacitor)
-	fc = 1.0 / (2.0 * pi * R * C);
-	calcRCFilterCoeffs(audioFreq, fc, &filterHiA1200);
+	fc = 1.0 / (PT2_TWO_PI * R * C); // cutoff = ~5.20Hz
+	calcRCFilterCoeffs(dAudioFreq, fc, &filterHiA1200);
 }
 
-void recalcFilterCoeffs(int32_t outputRate) // for MOD2WAV
+void mixerSetStereoSeparation(uint8_t percentage) // 0..100 (percentage)
 {
-	const bool audioWasntLocked = !audio.locked;
-	if (audioWasntLocked)
-		lockAudio();
+	assert(percentage <= 100);
 
-	const int32_t oldOutputRate = audio.outputRate;
-	audio.outputRate = outputRate;
-
-	clearRCFilterState(&filterLoA500);
-	clearRCFilterState(&filterHiA500);
-	clearRCFilterState(&filterHiA1200);
-	clearLEDFilterState(&filterLED);
-
-	calculateFilterCoeffs();
-
-	audio.outputRate = oldOutputRate;
-	if (audioWasntLocked)
-		unlockAudio();
+	stereoSeparation = percentage;
+	dSideFactor = (percentage / 100.0) * STEREO_NORM_FACTOR;
 }
 
-static void setVoicePan(int32_t ch, double pan) // pan = 0.0 .. 1.0
-{
-	// constant power panning
-
-	const double pi = 4.0 * atan(1.0); // M_PI can not be trusted
-
-	paula[ch].dPanL = cos(pan * pi * 0.5) * sqrt(2.0);
-	paula[ch].dPanR = sin(pan * pi * 0.5) * sqrt(2.0);
-}
-
-void mixerCalcVoicePans(uint8_t stereoSeparation) // 0..100 (percentage)
-{
-	assert(stereoSeparation <= 100);
-
-	const double panMid = 0.5;
-
-	const double panR = panMid + (stereoSeparation / (100.0 * 2.0));
-	const double panL = 1.0 - panR;
-
-	setVoicePan(0, panL);
-	setVoicePan(1, panR);
-	setVoicePan(2, panR);
-	setVoicePan(3, panL);
-}
-
 static double ciaBpm2Hz(int32_t bpm)
 {
 	if (bpm == 0)
@@ -873,17 +937,15 @@
 		else
 			dBpmHz = ciaBpm2Hz(bpm);
 
-		const double dSamplesPerTick = audio.outputRate / dBpmHz;
-		const double dSamplesPerTick28kHz = PAT2SMP_HI_FREQ / dBpmHz; // PAT2SMP hi quality
-		const double dSamplesPerTick22kHz = PAT2SMP_LO_FREQ / dBpmHz; // PAT2SMP low quality
-		const double dSamplesPerTickMod2Wav = MOD2WAV_FREQ / dBpmHz; // MOD2WAV
+		const double dSamplesPerTick      = audio.outputRate / dBpmHz;
+		const double dSamplesPerTick28kHz = PAT2SMP_HI_FREQ  / dBpmHz; // PAT2SMP hi quality
+		const double dSamplesPerTick20kHz = PAT2SMP_LO_FREQ  / dBpmHz; // PAT2SMP low quality
 
 		// convert to rounded 32.32 fixed-point
-		const int32_t i = bpm-32;
-		audio.bpmTable[i] = (int64_t)((dSamplesPerTick * (UINT32_MAX+1.0)) + 0.5);
+		const int32_t i = bpm - 32;
+		audio.bpmTable[i]      = (int64_t)((dSamplesPerTick      * (UINT32_MAX+1.0)) + 0.5);
 		audio.bpmTable28kHz[i] = (int64_t)((dSamplesPerTick28kHz * (UINT32_MAX+1.0)) + 0.5);
-		audio.bpmTable22kHz[i] = (int64_t)((dSamplesPerTick22kHz * (UINT32_MAX+1.0)) + 0.5);
-		audio.bpmTableMod2Wav[i] = (int64_t)((dSamplesPerTickMod2Wav * (UINT32_MAX+1.0)) + 0.5);
+		audio.bpmTable20kHz[i] = (int64_t)((dSamplesPerTick20kHz * (UINT32_MAX+1.0)) + 0.5);
 	}
 }
 
@@ -941,9 +1003,11 @@
 		return false;
 	}
 
-	if (have.freq < 32000) // lower than this is not safe for the BLEP synthesis in the mixer
+	// lower than this is not safe for the BLEP synthesis in the mixer
+	const int32_t minFreq = (int32_t)(PAULA_PAL_CLK / 113.0 / 2.0)+1; // /2 because we do 2x oversampling
+	if (have.freq < minFreq)
 	{
-		showErrorMsgBox("Unable to open audio: An audio rate below 32kHz can't be used!");
+		showErrorMsgBox("Unable to open audio: An audio rate below %dHz can't be used!", minFreq);
 		return false;
 	}
 
@@ -960,15 +1024,13 @@
 	updateReplayerTimingMode();
 
 	const int32_t lowestBPM = 32;
-	const int32_t pat2SmpMaxSamples = (audio.bpmTable22kHz[lowestBPM-32] + (1LL + 31)) >> 32; // ceil (rounded upwards)
-	const int32_t mod2WavMaxSamples = (audio.bpmTableMod2Wav[lowestBPM-32] + (1LL + 31)) >> 32; // ceil (rounded upwards)
+	const int32_t pat2SmpMaxSamples = (audio.bpmTable20kHz[lowestBPM-32] + (1LL + 31)) >> 32; // ceil (rounded upwards)
 	const int32_t renderMaxSamples = (audio.bpmTable[lowestBPM-32] + (1LL + 31)) >> 32; // ceil (rounded upwards)
+	const int32_t maxSamplesToMix = MAX(pat2SmpMaxSamples, renderMaxSamples) * 2; // *2 for headroom (XXX: buggy code somewhere?)
 
-	const int32_t maxSamplesToMix = MAX(pat2SmpMaxSamples, MAX(mod2WavMaxSamples, renderMaxSamples));
+	dMixBufferLUnaligned = (double *)MALLOC_PAD(maxSamplesToMix * sizeof (double), 256);
+	dMixBufferRUnaligned = (double *)MALLOC_PAD(maxSamplesToMix * sizeof (double), 256);
 
-	dMixBufferLUnaligned = (double *)MALLOC_PAD(maxSamplesToMix * sizeof (double) * 8, 256);
-	dMixBufferRUnaligned = (double *)MALLOC_PAD(maxSamplesToMix * sizeof (double) * 8, 256);
-
 	if (dMixBufferLUnaligned == NULL || dMixBufferRUnaligned == NULL)
 	{
 		showErrorMsgBox("Out of memory!");
@@ -978,8 +1040,7 @@
 	dMixBufferL = (double *)ALIGN_PTR(dMixBufferLUnaligned, 256);
 	dMixBufferR = (double *)ALIGN_PTR(dMixBufferRUnaligned, 256);
 
-	mixerCalcVoicePans(config.stereoSeparation);
-	defStereoSep = config.stereoSeparation;
+	mixerSetStereoSeparation(config.stereoSeparation);
 
 	filterModel = config.filterModel;
 	ledFilterEnabled = false;
@@ -991,8 +1052,10 @@
 	calcAudioLatencyVars(audio.audioBufferSize, audio.outputRate);
 
 	resetCachedMixerPeriod();
-	resetAudioDownsamplingStates();
+	clearMixerDownsamplerStates();
 	audio.resetSyncTickTimeFlag = true;
+
+	updateFilterFunc();
 	SDL_PauseAudioDevice(dev, false);
 	return true;
 }
@@ -1028,12 +1091,12 @@
 	amigaPanFlag ^= 1;
 	if (!amigaPanFlag)
 	{
-		mixerCalcVoicePans(defStereoSep);
+		mixerSetStereoSeparation(config.stereoSeparation);
 		displayMsg("AMIGA PANNING OFF");
 	}
 	else
 	{
-		mixerCalcVoicePans(100);
+		mixerSetStereoSeparation(100);
 		displayMsg("AMIGA PANNING ON");
 	}
 
--- a/src/pt2_audio.h
+++ b/src/pt2_audio.h
@@ -8,11 +8,11 @@
 {
 	volatile bool locked, isSampling;
 
-	bool forceMixerOff;
+	bool forceSoundCardSilence;
 	
 	uint32_t outputRate, audioBufferSize;
 	int64_t tickSampleCounter64, samplesPerTick64;
-	int64_t bpmTable[256-32], bpmTable28kHz[256-32], bpmTable22kHz[256-32], bpmTableMod2Wav[256-32]; // 32.32 fixed-point
+	int64_t bpmTable[256-32], bpmTable28kHz[256-32], bpmTable20kHz[256-32]; // 32.32 fixed-point
 	double dPeriodToDeltaDiv;
 
 	// for audio sampling
@@ -29,7 +29,7 @@
 
 	const int8_t *data, *newData;
 	int32_t length, newLength, pos;
-	double dVolume, dDelta, dDeltaMul, dPhase, dLastDelta, dLastDeltaMul, dLastPhase, dPanL, dPanR;
+	double dVolume, dDelta, dDeltaMul, dPhase, dLastDelta, dLastDeltaMul, dLastPhase;
 
 	// period cache
 	int32_t oldPeriod;
@@ -58,7 +58,6 @@
 void normalizeFloatTo8Bit(float *fSampleData, uint32_t sampleLength);
 void normalizeDoubleTo8Bit(double *dSampleData, uint32_t sampleLength);
 
-void recalcFilterCoeffs(int32_t outputRate); // for MOD2WAV
 void setLEDFilter(bool state, bool doLockAudio);
 void toggleLEDFilter(void);
 void toggleAmigaPanMode(void);
@@ -74,9 +73,8 @@
 void mixerUpdateLoops(void);
 void mixerKillVoice(int32_t ch);
 void turnOffVoices(void);
-void mixerCalcVoicePans(uint8_t stereoSeparation);
+void mixerSetStereoSeparation(uint8_t percentage);
 void outputAudio(int16_t *target, int32_t numSamples);
-void resetAudioDownsamplingStates(void);
 
 extern audio_t audio; // pt2_audio.c
 extern paulaVoice_t paula[AMIGA_VOICES]; // pt2_audio.c
--- a/src/pt2_chordmaker.c
+++ b/src/pt2_chordmaker.c
@@ -164,7 +164,7 @@
 	s->text[21] = '!'; // chord sample indicator
 	s->text[22] = '\0';
 
-	memset(mixCh, 0, sizeof (mixCh));
+	memset(mixCh, 0, sizeof (mixCh)); // also clears position and frac
 
 	// setup mixing lengths and deltas
 
--- a/src/pt2_downsample2x.c
+++ b/src/pt2_downsample2x.c
@@ -3,82 +3,167 @@
 #include <crtdbg.h>
 #endif
 
+/* High-quality /2 decimator from
+** https://www.musicdsp.org/en/latest/Filters/231-hiqh-quality-2-decimators.html
+*/
+
 #include <stdint.h>
 #include <stdbool.h>
-#include "pt2_helpers.h" // CLAMP
+#include <math.h> // round()
+#include "pt2_helpers.h" // ABS()
 
-static double state[2];
+// ----------------------------------------------------------
+// reserved for main audio channel mixer, PAT2SMP and MOD2WAV
+// ----------------------------------------------------------
 
-/*
-** - all-pass halfband filters (2x downsample) -
-**
-** 8bitbubsy: Not sure who coded these. Possibly aciddose,
-** or maybe he found it on the internet somewhere...
-*/
+static double R1_L, R2_L, R3_L, R4_L, R5_L, R6_L, R7_L, R8_L, R9_L;
+static double R1_R, R2_R, R3_R, R4_R, R5_R, R6_R, R7_R, R8_R, R9_R;
 
-static double f(const double in, double *b, const double c)
+void clearMixerDownsamplerStates(void)
 {
-	const double x = (in - *b) * c;
-	const double out = *b + x;
-	*b = in + x;
+	R1_L = R2_L = R3_L = R4_L = R5_L = R6_L = R7_L = R8_L = R9_L = 0.0;
+	R1_R = R2_R = R3_R = R4_R = R5_R = R6_R = R7_R = R8_R = R9_R = 0.0;
+}
 
-	return out;
+double decimate2x_L(double x0, double x1)
+{
+	const double h0 =  8192.0 / 16384.0;
+	const double h1 =  5042.0 / 16384.0;
+	const double h3 = -1277.0 / 16384.0;
+	const double h5 =   429.0 / 16384.0;
+	const double h7 =  -116.0 / 16384.0;
+	const double h9 =    18.0 / 16384.0;
+
+	double h9x0 = h9*x0;
+	double h7x0 = h7*x0;
+	double h5x0 = h5*x0;
+	double h3x0 = h3*x0;
+	double h1x0 = h1*x0;
+	double R10  = R9_L+h9x0;
+
+	R9_L = R8_L+h7x0;
+	R8_L = R7_L+h5x0;
+	R7_L = R6_L+h3x0;
+	R6_L = R5_L+h1x0;
+	R5_L = R4_L+h1x0+h0*x1;
+	R4_L = R3_L+h3x0;
+	R3_L = R2_L+h5x0;
+	R2_L = R1_L+h7x0;
+	R1_L = h9x0;
+
+	return R10;
 }
 
-double d2x(const double *input, double *b)
+double decimate2x_R(double x0, double x1)
 {
-	return (f(input[0], &b[0], 0.150634765625) + f(input[1], &b[1], -0.3925628662109375)) * 0.5;
+	const double h0 =  8192.0 / 16384.0;
+	const double h1 =  5042.0 / 16384.0;
+	const double h3 = -1277.0 / 16384.0;
+	const double h5 =   429.0 / 16384.0;
+	const double h7 =  -116.0 / 16384.0;
+	const double h9 =    18.0 / 16384.0;
+
+	double h9x0 = h9*x0;
+	double h7x0 = h7*x0;
+	double h5x0 = h5*x0;
+	double h3x0 = h3*x0;
+	double h1x0 = h1*x0;
+	double R10  = R9_R+h9x0;
+
+	R9_R = R8_R+h7x0;
+	R8_R = R7_R+h5x0;
+	R7_R = R6_R+h3x0;
+	R6_R = R5_R+h1x0;
+	R5_R = R4_R+h1x0+h0*x1;
+	R4_R = R3_R+h3x0;
+	R3_R = R2_R+h5x0;
+	R2_R = R1_R+h7x0;
+	R1_R = h9x0;
+
+	return R10;
 }
 
+// ----------------------------------------------------------
+// ----------------------------------------------------------
+// ----------------------------------------------------------
+
+static double R1, R2, R3, R4, R5, R6, R7, R8, R9;
+
+static void clearDownsamplerState(void)
+{
+	R1 = R2 = R3 = R4 = R5 = R6 = R7 = R8 = R9 = 0.0;
+}
+
+static double decimate2x(double x0, double x1)
+{
+	const double h0 =  8192.0 / 16384.0;
+	const double h1 =  5042.0 / 16384.0;
+	const double h3 = -1277.0 / 16384.0;
+	const double h5 =   429.0 / 16384.0;
+	const double h7 =  -116.0 / 16384.0;
+	const double h9 =    18.0 / 16384.0;
+
+	double h9x0 = h9*x0;
+	double h7x0 = h7*x0;
+	double h5x0 = h5*x0;
+	double h3x0 = h3*x0;
+	double h1x0 = h1*x0;
+	double R10  = R9+h9x0;
+
+	R9 = R8+h7x0;
+	R8 = R7+h5x0;
+	R7 = R6+h3x0;
+	R6 = R5+h1x0;
+	R5 = R4+h1x0+h0*x1;
+	R4 = R3+h3x0;
+	R3 = R2+h5x0;
+	R2 = R1+h7x0;
+	R1 = h9x0;
+
+	return R10;
+}
+
 // Warning: These can exceed original range because of undershoot/overshoot!
 
-void downsample2xDouble(double *buffer, int32_t originalLength)
+void downsample2xDouble(double *buffer, uint32_t originalLength)
 {
-	state[0] = state[1] = 0.0;
+	clearDownsamplerState();
 
 	const double *input = buffer;
-	const int32_t length = originalLength / 2;
-	for (int32_t i = 0; i < length; i++, input += 2)
-		buffer[i] = d2x(input, state);
+	const uint32_t length = originalLength / 2;
+	for (uint32_t i = 0; i < length; i++, input += 2)
+		buffer[i] = decimate2x(input[0], input[1]);
 }
 
-void downsample2xFloat(float *buffer, int32_t originalLength)
+void downsample2xFloat(float *buffer, uint32_t originalLength)
 {
-	double in[2];
+	clearDownsamplerState();
 
-	state[0] = state[1] = 0.0;
-
 	const float *input = buffer;
-	const int32_t length = originalLength / 2;
-	for (int32_t i = 0; i < length; i++, input += 2)
-	{
-		in[0] = input[0];
-		in[1] = input[1];
-
-		buffer[i] = (float)d2x(in, state);
-	}
+	const uint32_t length = originalLength / 2;
+	for (uint32_t i = 0; i < length; i++, input += 2)
+		buffer[i] = (float)decimate2x(input[0], input[1]);
 }
 
 // Warning: These are slow and use normalization to prevent clipping from undershoot/overshoot!
 
-bool downsample2x8BitU(uint8_t *buffer, int32_t originalLength)
+bool downsample2x8BitU(uint8_t *buffer, uint32_t originalLength)
 {
-	state[0] = state[1] = 0.0;
-
 	double *dBuffer = (double *)malloc(originalLength * sizeof (double));
 	if (dBuffer == NULL)
 		return false;
 
-	for (int32_t i = 0; i < originalLength; i++)
+	for (uint32_t i = 0; i < originalLength; i++)
 		dBuffer[i] = (buffer[i] - 128) * (1.0 / (INT8_MAX+1.0));
 
 	const double *input = dBuffer;
 	double dPeak = 0.0;
 
-	const int32_t length = originalLength / 2;
-	for (int32_t i = 0; i < length; i++, input += 2)
+	clearDownsamplerState();
+	const uint32_t length = originalLength / 2;
+	for (uint32_t i = 0; i < length; i++, input += 2)
 	{
-		double dOut = d2x(input, state);
+		double dOut = decimate2x(input[0], input[1]);
 		dBuffer[i] = dOut;
 
 		dOut = ABS(dOut);
@@ -92,32 +177,39 @@
 	if (dPeak > 0.0)
 		dAmp = INT8_MAX / dPeak;
 
-	for (int32_t i = 0; i < length; i++)
-		buffer[i] = (uint8_t)round(dBuffer[i] * dAmp) + 128;
+	for (uint32_t i = 0; i < length; i++)
+	{
+		double dSmp = dBuffer[i] * dAmp;
 
+		// faster than calling round()
+		     if (dSmp < 0.0) dSmp -= 0.5;
+		else if (dSmp > 0.0) dSmp += 0.5;
+
+		buffer[i] = (uint8_t)dSmp + 128;
+	}
+
 	free(dBuffer);
 
 	return true;
 }
 
-bool downsample2x8Bit(int8_t *buffer, int32_t originalLength)
+bool downsample2x8Bit(int8_t *buffer, uint32_t originalLength)
 {
-	state[0] = state[1] = 0.0;
-
 	double *dBuffer = (double *)malloc(originalLength * sizeof (double));
 	if (dBuffer == NULL)
 		return false;
 
-	for (int32_t i = 0; i < originalLength; i++)
+	for (uint32_t i = 0; i < originalLength; i++)
 		dBuffer[i] = buffer[i] * (1.0 / (INT8_MAX+1.0));
 
 	const double *input = dBuffer;
 	double dPeak = 0.0;
 
-	const int32_t length = originalLength / 2;
-	for (int32_t i = 0; i < length; i++, input += 2)
+	clearDownsamplerState();
+	const uint32_t length = originalLength / 2;
+	for (uint32_t i = 0; i < length; i++, input += 2)
 	{
-		double dOut = d2x(input, state);
+		double dOut = decimate2x(input[0], input[1]);
 		dBuffer[i] = dOut;
 
 		dOut = ABS(dOut);
@@ -131,32 +223,39 @@
 	if (dPeak > 0.0)
 		dAmp = INT8_MAX / dPeak;
 
-	for (int32_t i = 0; i < length; i++)
-		buffer[i] = (int8_t)round(dBuffer[i] * dAmp);
+	for (uint32_t i = 0; i < length; i++)
+	{
+		double dSmp = dBuffer[i] * dAmp;
 
+		// faster than calling round()
+		     if (dSmp < 0.0) dSmp -= 0.5;
+		else if (dSmp > 0.0) dSmp += 0.5;
+
+		buffer[i] = (int8_t)dSmp;
+	}
+
 	free(dBuffer);
 
 	return true;
 }
 
-bool downsample2x16Bit(int16_t *buffer, int32_t originalLength)
+bool downsample2x16Bit(int16_t *buffer, uint32_t originalLength)
 {
-	state[0] = state[1] = 0.0;
-
 	double *dBuffer = (double *)malloc(originalLength * sizeof (double));
 	if (dBuffer == NULL)
 		return false;
 
-	for (int32_t i = 0; i < originalLength; i++)
+	for (uint32_t i = 0; i < originalLength; i++)
 		dBuffer[i] = buffer[i] * (1.0 / (INT16_MAX+1.0));
 
 	const double *input = dBuffer;
 	double dPeak = 0.0;
 
-	const int32_t length = originalLength / 2;
-	for (int32_t i = 0; i < length; i++, input += 2)
+	clearDownsamplerState();
+	const uint32_t length = originalLength / 2;
+	for (uint32_t i = 0; i < length; i++, input += 2)
 	{
-		double dOut = d2x(input, state);
+		double dOut = decimate2x(input[0], input[1]);
 		dBuffer[i] = dOut;
 
 		dOut = ABS(dOut);
@@ -170,32 +269,39 @@
 	if (dPeak > 0.0)
 		dAmp = INT16_MAX / dPeak;
 
-	for (int32_t i = 0; i < length; i++)
-		buffer[i] = (int16_t)round(dBuffer[i] * dAmp);
+	for (uint32_t i = 0; i < length; i++)
+	{
+		double dSmp = dBuffer[i] * dAmp;
 
+		// faster than calling round()
+		     if (dSmp < 0.0) dSmp -= 0.5;
+		else if (dSmp > 0.0) dSmp += 0.5;
+
+		buffer[i] = (int16_t)dSmp;
+	}
+
 	free(dBuffer);
 
 	return true;
 }
 
-bool downsample2x32Bit(int32_t *buffer, int32_t originalLength)
+bool downsample2x32Bit(int32_t *buffer, uint32_t originalLength)
 {
-	state[0] = state[1] = 0.0;
-
 	double *dBuffer = (double *)malloc(originalLength * sizeof (double));
 	if (dBuffer == NULL)
 		return false;
 
-	for (int32_t i = 0; i < originalLength; i++)
+	for (uint32_t i = 0; i < originalLength; i++)
 		dBuffer[i] = buffer[i] * (1.0 / (INT32_MAX+1.0));
 
 	const double *input = dBuffer;
 	double dPeak = 0.0;
 
-	const int32_t length = originalLength / 2;
-	for (int32_t i = 0; i < length; i++, input += 2)
+	clearDownsamplerState();
+	const uint32_t length = originalLength / 2;
+	for (uint32_t i = 0; i < length; i++, input += 2)
 	{
-		double dOut = d2x(input, state);
+		double dOut = decimate2x(input[0], input[1]);
 		dBuffer[i] = dOut;
 
 		dOut = ABS(dOut);
@@ -209,8 +315,16 @@
 	if (dPeak > 0.0)
 		dAmp = INT32_MAX / dPeak;
 
-	for (int32_t i = 0; i < length; i++)
-		buffer[i] = (int32_t)round(dBuffer[i] * dAmp);
+	for (uint32_t i = 0; i < length; i++)
+	{
+		double dSmp = dBuffer[i] * dAmp;
+
+		// faster than calling round()
+		     if (dSmp < 0.0) dSmp -= 0.5;
+		else if (dSmp > 0.0) dSmp += 0.5;
+
+		buffer[i] = (int32_t)dSmp;
+	}
 
 	free(dBuffer);
 
--- a/src/pt2_downsamplers2x.h
+++ b/src/pt2_downsamplers2x.h
@@ -2,18 +2,18 @@
 
 #include <stdint.h>
 
-// all-pass halfband filters
+// reserved for main audio channel mixer, PAT2SMP and MOD2WAV
+void clearMixerDownsamplerStates(void);
+double decimate2x_L(double x0, double x1);
+double decimate2x_R(double x0, double x1);
+// --------------------------------------
 
-double d2x(const double *input, double *b);
-
 // Warning: These can exceed -1.0 .. 1.0 because of undershoot/overshoot!
+void downsample2xFloat(float *buffer, uint32_t originalLength);
+void downsample2xDouble(double *buffer, uint32_t originalLength);
 
-void downsample2xFloat(float *buffer, int32_t originalLength);
-void downsample2xDouble(double *buffer, int32_t originalLength);
-
 // Warning: These are slow and use normalization to prevent clipping from undershoot/overshoot!
-
-void downsample2x8Bit(int8_t *buffer, int32_t originalLength);
-void downsample2x8BitU(uint8_t *buffer, int32_t originalLength);
-void downsample2x16Bit(int16_t *buffer, int32_t originalLength);
-void downsample2x32Bit(int32_t *buffer, int32_t originalLength);
+void downsample2x8Bit(int8_t *buffer, uint32_t originalLength);
+void downsample2x8BitU(uint8_t *buffer, uint32_t originalLength);
+void downsample2x16Bit(int16_t *buffer, uint32_t originalLength);
+void downsample2x32Bit(int32_t *buffer, uint32_t originalLength);
--- a/src/pt2_header.h
+++ b/src/pt2_header.h
@@ -14,7 +14,7 @@
 #include "pt2_unicode.h"
 #include "pt2_palette.h"
 
-#define PROG_VER_STR "1.32"
+#define PROG_VER_STR "1.33"
 
 #ifdef _WIN32
 #define DIR_DELIMITER '\\'
--- a/src/pt2_ledfilter.c
+++ b/src/pt2_ledfilter.c
@@ -1,8 +1,3 @@
-#include <stdint.h>
-#include <math.h>
-#include "pt2_rcfilter.h" // DENORMAL_OFFSET definition
-#include "pt2_ledfilter.h"
-
 /* aciddose:
 ** Imperfect Amiga "LED" filter implementation. This may be further improved in the future.
 ** Based upon ideas posted by mystran @ the kvraudio.com forum.
@@ -10,12 +5,14 @@
 ** This filter may not function correctly used outside the fixed-cutoff context here!
 */
 
+#include <stdint.h>
+#include "pt2_math.h"
+#include "pt2_ledfilter.h"
+
 void clearLEDFilterState(ledFilter_t *filterLED)
 {
-	filterLED->buffer[0] = 0.0; // left channel
-	filterLED->buffer[1] = 0.0;
-	filterLED->buffer[2] = 0.0; // right channel
-	filterLED->buffer[3] = 0.0;
+	filterLED->buffer[0] = filterLED->buffer[1] = 0.0; // left channel
+	filterLED->buffer[2] = filterLED->buffer[3] = 0.0; // right channel
 }
 
 static double sigmoid(double x, double coefficient)
@@ -30,14 +27,8 @@
 
 void calcLEDFilterCoeffs(const double sr, const double hz, const double fb, ledFilter_t *filter)
 {
-	/* aciddose:
-	** tan() may produce NaN or other bad results in some cases!
-	** It appears to work correctly with these specific coefficients.
-	*/
-
-	const double pi = 4.0 * atan(1.0); // M_PI can not be trusted
-
-	const double c = (hz < (sr / 2.0)) ? tan((pi * hz) / sr) : 1.0;
+	// 8bitbubsy: the tangent approximation is suitable for these input ranges
+	const double c = (hz < sr/2.0) ? pt2_tan((PT2_PI * hz) / sr) : 1.0;
 	const double g = 1.0 / (1.0 + c);
 
 	// aciddose: dirty compensation
--- a/src/pt2_main.c
+++ b/src/pt2_main.c
@@ -233,7 +233,7 @@
 	makeSureDirIsProgramDir();
 #endif
 
-	if (!initializeVars())
+	if (!initializeVars() || !initKaiserTable())
 	{
 		cleanUp();
 		SDL_Quit();
@@ -899,6 +899,7 @@
 	videoClose();
 	freeSprites();
 	freeAudioDeviceList(); // pt2_sampling.c
+	freeKaiserTable(); // pt2_sampling.c
 
 	if (config.defModulesDir != NULL) free(config.defModulesDir);
 	if (config.defSamplesDir != NULL) free(config.defSamplesDir);
--- /dev/null
+++ b/src/pt2_math.c
@@ -1,0 +1,92 @@
+/* Quite accurate approximation routines for sin/cos/sqrt/tan.
+** These should not be used in realtime, as they are too slow.
+*/
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <math.h>
+#include "pt2_math.h"
+
+static const double pi = PT2_PI;
+static const double twopi = PT2_TWO_PI;
+
+static const double four_over_pi = 4.0 / PT2_PI;
+static const double threehalfpi = (3.0 * PT2_PI) / 2.0;
+static const double halfpi = PT2_PI / 2.0;
+
+static double tan_14s(double x)
+{
+	const double c1 = -34287.4662577359568109624;
+	const double c2 =   2566.7175462315050423295;
+	const double c3 = -   26.5366371951731325438;
+	const double c4 = -43656.1579281292375769579;
+	const double c5 =  12244.4839556747426927793;
+	const double c6 = -  336.611376245464339493;
+
+	double x2 = x * x;
+	return x*(c1 + x2*(c2 + x2*c3))/(c4 + x2*(c5 + x2*(c6 + x2)));
+}
+
+double pt2_tan(double x)
+{
+	x = fmod(x, twopi);
+
+	const int32_t octant = (int32_t)(x * four_over_pi);
+	switch (octant)
+	{
+		default:
+		case 0: return      tan_14s(x               * four_over_pi);
+		case 1: return  1.0/tan_14s((halfpi-x)      * four_over_pi);
+		case 2: return -1.0/tan_14s((x-halfpi)      * four_over_pi);
+		case 3: return     -tan_14s((pi-x)          * four_over_pi);
+		case 4: return      tan_14s((x-pi)          * four_over_pi);
+		case 5: return  1.0/tan_14s((threehalfpi-x) * four_over_pi);
+		case 6: return -1.0/tan_14s((x-threehalfpi) * four_over_pi);
+		case 7: return     -tan_14s((twopi-x)       * four_over_pi);
+	}
+}
+
+double pt2_sqrt(double x)
+{
+	double number = x;
+	double s = number / 2.5;
+
+	double old = 0.0;
+	while (s != old)
+	{
+		old = s;
+		s = (number / old + old) / 2.0;
+	}
+ 
+	return s;
+}
+
+static double cosTaylorSeries(double x)
+{
+#define ITERATIONS 32 /* good enough... */
+
+	x = fmod(x, twopi);
+	if (x < 0.0)
+		x = -x;
+
+	double tmp = 1.0;
+	double sum = 1.0;
+
+	for (double i = 2.0; i <= ITERATIONS*2.0; i += 2.0)
+	{
+		tmp *= -(x*x) / (i * (i-1.0));
+		sum += tmp;
+	}
+
+	return sum;
+}
+
+double pt2_cos(double x)
+{
+	return cosTaylorSeries(x);
+}
+
+double pt2_sin(double x)
+{
+	return cosTaylorSeries(halfpi-x);
+}
--- /dev/null
+++ b/src/pt2_math.h
@@ -1,0 +1,15 @@
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+
+// adding this prevents denormalized numbers, which is slow
+#define DENORMAL_OFFSET 1e-20
+
+#define PT2_PI 3.14159265358979323846264338327950288
+#define PT2_TWO_PI 6.28318530717958647692528676655900576
+
+double pt2_sqrt(double x);
+double pt2_tan(double x);
+double pt2_cos(double x);
+double pt2_sin(double x);
--- a/src/pt2_mod2wav.c
+++ b/src/pt2_mod2wav.c
@@ -14,6 +14,7 @@
 #include "pt2_visuals.h"
 #include "pt2_mod2wav.h"
 #include "pt2_structs.h"
+#include "pt2_downsamplers2x.h"
 
 #define TICKS_PER_RENDER_CHUNK 64
 
@@ -36,13 +37,12 @@
 	// skip wav header place, render data first
 	fseek(f, sizeof (wavHeader_t), SEEK_SET);
 
-	if (MOD2WAV_FREQ != audio.outputRate)
-		recalcFilterCoeffs(MOD2WAV_FREQ);
-
 	uint32_t sampleCounter = 0;
 	uint8_t tickCounter = 8;
 	int64_t tickSampleCounter64 = 0;
 
+	clearMixerDownsamplerStates();
+
 	bool renderDone = false;
 	while (!renderDone)
 	{
@@ -89,9 +89,6 @@
 			fwrite(mod2WavBuffer, sizeof (int16_t), samplesInChunk, f);
 	}
 
-	if (MOD2WAV_FREQ != audio.outputRate)
-		recalcFilterCoeffs(audio.outputRate);
-
 	free(mod2WavBuffer);
 
 	if (sampleCounter & 1)
@@ -109,7 +106,7 @@
 	wavHeader.subchunk1Size = 16;
 	wavHeader.audioFormat = 1;
 	wavHeader.numChannels = 2;
-	wavHeader.sampleRate = MOD2WAV_FREQ;
+	wavHeader.sampleRate = audio.outputRate;
 	wavHeader.bitsPerSample = 16;
 	wavHeader.byteRate = (wavHeader.sampleRate * wavHeader.numChannels * wavHeader.bitsPerSample) / 8;
 	wavHeader.blockAlign = (wavHeader.numChannels * wavHeader.bitsPerSample) / 8;
@@ -120,7 +117,7 @@
 	fwrite(&wavHeader, sizeof (wavHeader_t), 1, f);
 	fclose(f);
 
-	resetAudioDownsamplingStates();
+	clearMixerDownsamplerStates();
 
 	ui.mod2WavFinished = true;
 	ui.updateMod2WavDialog = true;
@@ -164,7 +161,7 @@
 	}
 
 	const int32_t lowestBPM = 32;
-	const int64_t maxSamplesToMix64 = audio.bpmTableMod2Wav[lowestBPM-32];
+	const int64_t maxSamplesToMix64 = audio.bpmTable[lowestBPM-32];
 	const int32_t maxSamplesToMix = ((TICKS_PER_RENDER_CHUNK * maxSamplesToMix64) + (1LL << 31)) >> 32; // ceil (rounded upwards)
 
 	mod2WavBuffer = (int16_t *)malloc(maxSamplesToMix * (2 * sizeof (int16_t)));
--- a/src/pt2_mod2wav.h
+++ b/src/pt2_mod2wav.h
@@ -2,6 +2,4 @@
 
 #include <stdbool.h>
 
-#define MOD2WAV_FREQ 96000
-
 bool renderToWav(char *fileName, bool checkIfFileExist);
--- a/src/pt2_pat2smp.c
+++ b/src/pt2_pat2smp.c
@@ -31,7 +31,7 @@
 		return;
 	}
 
-	editor.dPat2SmpBuf = (double *)malloc((MAX_SAMPLE_LEN*2) * sizeof (double));
+	editor.dPat2SmpBuf = (double *)malloc(MAX_SAMPLE_LEN * sizeof (double));
 	if (editor.dPat2SmpBuf == NULL)
 	{
 		statusOutOfMemory();
@@ -54,6 +54,8 @@
 
 	int64_t tickSampleCounter64 = 0;
 
+	clearMixerDownsamplerStates();
+
 	editor.smpRenderingDone = false;
 	while (!editor.smpRenderingDone && editor.songPlaying)
 	{
@@ -71,8 +73,7 @@
 	}
 	editor.isSMPRendering = false;
 
-	//const double dSamplesPerTick = audio.samplesPerTick64 / (UINT32_MAX+1.0);
-
+	clearMixerDownsamplerStates();
 	resetSong();
 
 	int32_t renderLength = editor.pat2SmpPos;
@@ -82,20 +83,13 @@
 	// set back old row
 	song->currRow = song->row = oldRow;
 
-	// downsample oversampled buffer, normalize and quantize to 8-bit
+	// normalize and quantize to 8-bit
 
-	downsample2xDouble(editor.dPat2SmpBuf, renderLength);
-	renderLength /= 2;
-
 	double dAmp = 1.0;
 	const double dPeak = getDoublePeak(editor.dPat2SmpBuf, renderLength);
 	if (dPeak > 0.0)
 		dAmp = INT8_MAX / dPeak;
 
-	double dVol = 64.0 * dPeak;
-	if (dVol > 64.0)
-		dVol = 64.0;
-
 	int8_t *smpPtr = &song->sampleData[s->offset];
 	for (int32_t i = 0; i < renderLength; i++)
 	{
@@ -122,7 +116,7 @@
 	}
 
 	s->length = (uint16_t)renderLength;
-	s->volume = (int8_t)round(dVol);
+	s->volume = 64;
 	s->loopStart = 0;
 	s->loopLength = 2;
 
--- a/src/pt2_pat2smp.h
+++ b/src/pt2_pat2smp.h
@@ -2,12 +2,10 @@
 
 #include "pt2_header.h"
 
-// we do 2x oversampling for BLEP synthesis to work right on all ProTracker pitches
-
 #define PAT2SMP_HI_PERIOD 124 /* A-3 finetune +4, 28603.99Hz */
 #define PAT2SMP_LO_PERIOD 170 /* E-3 finetune  0, 20864.08Hz */
 
-#define PAT2SMP_HI_FREQ (PAULA_PAL_CLK / (PAT2SMP_HI_PERIOD / 2.0))
-#define PAT2SMP_LO_FREQ (PAULA_PAL_CLK / (PAT2SMP_LO_PERIOD / 2.0))
+#define PAT2SMP_HI_FREQ ((double)PAULA_PAL_CLK / PAT2SMP_HI_PERIOD)
+#define PAT2SMP_LO_FREQ ((double)PAULA_PAL_CLK / PAT2SMP_LO_PERIOD)
 
 void doPat2Smp(void);
--- a/src/pt2_rcfilter.c
+++ b/src/pt2_rcfilter.c
@@ -1,73 +1,59 @@
-// 1-pole 6dB/oct RC filters, code by aciddose (I think?)
+/* 1-pole 6dB/oct RC filters, from:
+** https://www.musicdsp.org/en/latest/Filters/116-one-pole-lp-and-hp.html
+**
+** There's no frequency pre-warping with tan(), but doing that would
+** result in a cutoff that sounded slightly too low.
+*/
 
-
 #include <stdint.h>
-#include <math.h>
+#include "pt2_math.h"
 #include "pt2_rcfilter.h"
 
-void calcRCFilterCoeffs(double dSr, double dHz, rcFilter_t *f)
+void calcRCFilterCoeffs(double sr, double hz, rcFilter_t *f)
 {
-	const double pi = 4.0 * atan(1.0); // M_PI can not be trusted
+	const double a = (hz < sr/2.0) ? pt2_cos((PT2_TWO_PI * hz) / sr) : 1.0;
+	const double b = 2.0 - a;
+	const double c = b - pt2_sqrt((b*b)-1.0);
 
-	const double c = (dHz < (dSr / 2.0)) ? tan((pi * dHz) / dSr) : 1.0;
-	f->c = c;
-	f->c2 = f->c * 2.0;
-	f->g = 1.0 / (1.0 + f->c);
-	f->cg = f->c * f->g;
+	f->c1 = 1.0 - c;
+	f->c2 = c;
 }
 
 void clearRCFilterState(rcFilter_t *f)
 {
-	f->buffer[0] = 0.0; // left channel
-	f->buffer[1] = 0.0; // right channel
+	f->tmp[0] = f->tmp[1] = 0.0;
 }
 
-// aciddose: input 0 is resistor side of capacitor (low-pass), input 1 is reference side (high-pass)
-static inline double getLowpassOutput(rcFilter_t *f, const double input_0, const double input_1, const double buffer)
-{
-	double dOutput = DENORMAL_OFFSET;
-
-	dOutput += buffer * f->g + input_0 * f->cg + input_1 * (1.0 - f->cg);
-
-	return dOutput;
-}
-
 void RCLowPassFilterStereo(rcFilter_t *f, const double *in, double *out)
 {
-	double output;
+	// left channel
+	f->tmp[0] = (f->c1*in[0] + f->c2*f->tmp[0]) + DENORMAL_OFFSET;
+	out[0] = f->tmp[0];
 
-	// left channel RC low-pass
-	output = getLowpassOutput(f, in[0], 0.0, f->buffer[0]);
-	f->buffer[0] += (in[0] - output) * f->c2;
-	out[0] = output;
-
-	// right channel RC low-pass
-	output = getLowpassOutput(f, in[1], 0.0, f->buffer[1]);
-	f->buffer[1] += (in[1] - output) * f->c2;
-	out[1] = output;
+	// right channel
+	f->tmp[1] = (f->c1*in[1] + f->c2*f->tmp[1]) + DENORMAL_OFFSET;
+	out[1] = f->tmp[1];
 }
 
 void RCHighPassFilterStereo(rcFilter_t *f, const double *in, double *out)
 {
-	double low[2];
+	// left channel
+	f->tmp[0] = (f->c1*in[0] + f->c2*f->tmp[0]) + DENORMAL_OFFSET;
+	out[0] = in[0]-f->tmp[0];
 
-	RCLowPassFilterStereo(f, in, low);
-
-	out[0] = in[0] - low[0]; // left channel high-pass
-	out[1] = in[1] - low[1]; // right channel high-pass
+	// right channel
+	f->tmp[1] = (f->c1*in[1] + f->c2*f->tmp[1]) + DENORMAL_OFFSET;
+	out[1] = in[1]-f->tmp[1];
 }
 
 void RCLowPassFilter(rcFilter_t *f, const double in, double *out)
 {
-	double output = getLowpassOutput(f, in, 0.0, f->buffer[0]);
-	f->buffer[0] += (in - output) * f->c2;
-	*out = output;
+	f->tmp[0] = (f->c1*in + f->c2*f->tmp[0]) + DENORMAL_OFFSET;
+	*out = f->tmp[0];
 }
 
 void RCHighPassFilter(rcFilter_t *f, const double in, double *out)
 {
-	double low;
-
-	RCLowPassFilter(f, in, &low);
-	*out = in - low; // high-pass
+	f->tmp[0] = (f->c1*in + f->c2*f->tmp[0]) + DENORMAL_OFFSET;
+	*out = in-f->tmp[0];
 }
--- a/src/pt2_rcfilter.h
+++ b/src/pt2_rcfilter.h
@@ -3,19 +3,14 @@
 #include <stdint.h>
 #include <stdbool.h>
 
-// adding this prevents denormalized numbers, which is slow
-#define DENORMAL_OFFSET 1e-15
-
 typedef struct rcFilter_t
 {
-	double buffer[2];
-	double c, c2, g, cg;
+	double tmp[2], c1, c2;
 } rcFilter_t;
 
-void calcRCFilterCoeffs(const double sr, const double hz, rcFilter_t *f);
+void calcRCFilterCoeffs(double sr, double hz, rcFilter_t *f);
 void clearRCFilterState(rcFilter_t *f);
 void RCLowPassFilterStereo(rcFilter_t *f, const double *in, double *out);
 void RCHighPassFilterStereo(rcFilter_t *f, const double *in, double *out);
 void RCLowPassFilter(rcFilter_t *f, const double in, double *out);
 void RCHighPassFilter(rcFilter_t *f, const double in, double *out);
-
--- a/src/pt2_replayer.c
+++ b/src/pt2_replayer.c
@@ -1206,9 +1206,7 @@
 
 	int64_t samplesPerTick64;
 	if (editor.isSMPRendering)
-		samplesPerTick64 = editor.pat2SmpHQ ? audio.bpmTable28kHz[bpm] : audio.bpmTable22kHz[bpm];
-	else if (editor.isWAVRendering)
-		samplesPerTick64 = audio.bpmTableMod2Wav[bpm];
+		samplesPerTick64 = editor.pat2SmpHQ ? audio.bpmTable28kHz[bpm] : audio.bpmTable20kHz[bpm];
 	else
 		samplesPerTick64 = audio.bpmTable[bpm];
 
@@ -1516,7 +1514,7 @@
 
 	editor.playMode = PLAY_MODE_NORMAL;
 	editor.blockMarkFlag = false;
-	audio.forceMixerOff = true;
+	audio.forceSoundCardSilence = true;
 
 	song->row = 0;
 	song->currRow = 0;
@@ -1550,9 +1548,9 @@
 
 	turnOffVoices();
 
-	memset((int8_t *)editor.vuMeterVolumes,0, sizeof (editor.vuMeterVolumes));
+	memset((int8_t *)editor.vuMeterVolumes,     0, sizeof (editor.vuMeterVolumes));
 	memset((int8_t *)editor.realVuMeterVolumes, 0, sizeof (editor.realVuMeterVolumes));
-	memset((int8_t *)editor.spectrumVolumes, 0, sizeof (editor.spectrumVolumes));
+	memset((int8_t *)editor.spectrumVolumes,    0, sizeof (editor.spectrumVolumes));
 
 	memset(song->channels, 0, sizeof (song->channels));
 	for (uint8_t i = 0; i < AMIGA_VOICES; i++)
@@ -1579,5 +1577,5 @@
 
 	song->tick = 0;
 	modRenderDone = false;
-	audio.forceMixerOff = false;
+	audio.forceSoundCardSilence = false;
 }
--- a/src/pt2_sampling.c
+++ b/src/pt2_sampling.c
@@ -1,6 +1,10 @@
 /* Experimental audio sampling support.
 ** There may be several bad practices here, as I don't really
 ** have the proper knowledge on this stuff.
+**
+** Some functions like sin() may be different depending on
+** math library implementation, but we don't use pt_math.c
+** replacements for speed reasons.
 */
 
 // for finding memory leaks in debug mode with Visual Studio 
@@ -24,6 +28,7 @@
 #include "pt2_tables.h"
 #include "pt2_config.h"
 #include "pt2_sampling.h"
+#include "pt2_math.h" // PT2_PI
 
 enum
 {
@@ -39,6 +44,7 @@
 #define SINC_TAPS 64
 #define SINC_TAPS_BITS 6 /* log2(SINC_TAPS) */
 #define SINC_PHASES 4096
+#define MID_TAP ((SINC_TAPS/2)*SINC_PHASES)
 
 #define SAMPLE_PREVIEW_WITDH 194
 #define SAMPLE_PREVIEW_HEIGHT 38
@@ -53,7 +59,7 @@
 static int32_t samplingMode = SAMPLE_MIX, inputFrequency, roundedOutputFrequency;
 static int32_t numAudioInputDevs, audioInputDevListOffset, selectedDev;
 static int32_t bytesSampled, maxSamplingLength, inputBufferSize;
-static double dOutputFrequency, *dSincTable, *dSamplingBuffer, *dSamplingBufferOrig;
+static double dOutputFrequency, *dSincTable, *dKaiserTable, *dSamplingBuffer, *dSamplingBufferOrig;
 static SDL_AudioDeviceID recordDev;
 
 /*
@@ -65,6 +71,7 @@
 static double Izero(double y) // Compute Bessel function Izero(y) using a series approximation
 {
 	double s = 1.0, ds = 1.0, d = 0.0;
+	const double epsilon = 1E-9; // 8bb: 1E-7 -> 1E-9 for added precision (still fast to calculate)
 
 	do
 	{
@@ -72,26 +79,66 @@
 		ds = ds * (y * y) / (d * d);
 		s = s + ds;
 	}
-	while (ds > 1E-7 * s);
+	while (ds > epsilon * s);
 
 	return s;
 }
 
-static bool initSincTable(double cutoff)
+bool initKaiserTable(void) // called once on tracker init
 {
-	if (cutoff > 0.999)
-		cutoff = 0.999;
+	dKaiserTable = (double *)malloc(SINC_TAPS * SINC_PHASES * sizeof (double));
+	if (dKaiserTable == NULL)
+	{
+		showErrorMsgBox("Out of memory!");
+		return false;
+	}
 
+	const double beta = 9.6377;
+	const double izeroBeta = Izero(beta);
+
+	for (int32_t i = 0; i < SINC_TAPS*SINC_PHASES; i++)
+	{
+		double fkaiser;
+		int32_t ix = (SINC_TAPS-1) - (i & (SINC_TAPS-1));
+
+		ix = (ix * SINC_PHASES) + (i >> SINC_TAPS_BITS);
+		if (ix == MID_TAP)
+		{
+			fkaiser = 1.0;
+		}
+		else
+		{
+			const double x = (ix - MID_TAP) * (1.0 / SINC_PHASES);
+			const double xMul = 1.0 / ((SINC_TAPS/2) * (SINC_TAPS/2));
+			fkaiser = Izero(beta * sqrt(1.0 - x * x * xMul)) / izeroBeta;
+		}
+
+		dKaiserTable[i] = fkaiser;
+	}
+
+	return true;
+}
+
+void freeKaiserTable(void)
+{
+	if (dKaiserTable != NULL)
+	{
+		free(dKaiserTable);
+		dKaiserTable = NULL;
+	}
+}
+
+// calculated after completion of sampling (before downsampling)
+static bool initSincTable(double cutoff)
+{
 	dSincTable = (double *)malloc(SINC_TAPS * SINC_PHASES * sizeof (double));
 	if (dSincTable == NULL)
 		return false;
 
-	const double beta = 9.6377; // this value can maybe be tweaked (we do downsampling only)
-	const double izeroBeta = Izero(beta);
-	const double kPi = 4.0 * atan(1.0) * cutoff;
+	if (cutoff > 1.0)
+		cutoff = 1.0;
 
-#define MID_TAP ((SINC_TAPS/2)*SINC_PHASES)
-
+	const double kPi = PT2_PI * cutoff;
 	for (int32_t i = 0; i < SINC_TAPS*SINC_PHASES; i++)
 	{
 		double fsinc;
@@ -107,8 +154,7 @@
 			const double x = (ix - MID_TAP) * (1.0 / SINC_PHASES);
 			const double xPi = x * kPi;
 
-			const double xMul = 1.0 / ((SINC_TAPS/2) * (SINC_TAPS/2));
-			fsinc = sin(xPi) * Izero(beta * sqrt(1.0 - x * x * xMul)) / (izeroBeta * xPi); // Kaiser window
+			fsinc = (sin(xPi) / xPi) * dKaiserTable[i];
 		}
 
 		dSincTable[i] = fsinc * cutoff;
@@ -151,7 +197,7 @@
 		samplingNote = 35;
 
 	int32_t period = periodTable[((samplingFinetune & 0xF) * 37) + samplingNote];
-	if (period < 113) // this happens internally in our Paula mixer
+	if (period < 113) // also happens in our "set period" Paula function
 		period = 113;
 
 	dOutputFrequency = (double)PAULA_PAL_CLK / period;
@@ -167,7 +213,7 @@
 		if (len > SAMPLING_BUFFER_SIZE)
 			len = SAMPLING_BUFFER_SIZE;
 
-		const int16_t *L = (int16_t *)stream;
+		const int16_t *L =  (int16_t *)stream;
 		const int16_t *R = ((int16_t *)stream) + 1;
 
 		int16_t *dst16 = displayBuffer;
@@ -287,8 +333,6 @@
 		return;
 
 	listAudioDevices();
-	changeStatusText("PLEASE WAIT ...");
-	flipFrame();
 
 	stopInputAudio();
 	selectedDev = dev;
@@ -432,6 +476,9 @@
 
 void renderSamplingBox(void)
 {
+	changeStatusText("PLEASE WAIT ...");
+	flipFrame();
+
 	editor.sampleZero = false;
 	editor.blockMarkFlag = false;
 
@@ -459,8 +506,8 @@
 	selectAudioDevice(selectedDev);
 
 	showCurrSample();
-
 	modStop();
+
 	editor.songPlaying = false;
 	editor.playMode = PLAY_MODE_NORMAL;
 	editor.currMode = MODE_IDLE;
@@ -557,9 +604,9 @@
 
 	assert(roundedOutputFrequency > 0);
 
-	maxSamplingLength = (int32_t)(ceil((65534.0*inputFrequency) / dOutputFrequency)) + 1;
+	maxSamplingLength = (int32_t)(ceil(((double)MAX_SAMPLE_LEN*inputFrequency) / dOutputFrequency)) + 1;
 	
-	int32_t allocLen = (SINC_TAPS/2) + maxSamplingLength + (SINC_TAPS/2);
+	const int32_t allocLen = (SINC_TAPS/2) + maxSamplingLength + (SINC_TAPS/2);
 	dSamplingBufferOrig = (double *)malloc(allocLen * sizeof (double));
 	if (dSamplingBufferOrig == NULL)
 	{
@@ -568,7 +615,7 @@
 	}
 	dSamplingBuffer = dSamplingBufferOrig + (SINC_TAPS/2); // allow negative look-up for sinc taps
 
-	// clear tap area
+	// clear tap area before sample
 	memset(dSamplingBufferOrig, 0, (SINC_TAPS/2) * sizeof (double));
 
 	bytesSampled = 0;
@@ -583,7 +630,7 @@
 
 static int32_t downsampleSamplingBuffer(void)
 {
-	// clear tap area
+	// clear tap area after sample
 	memset(&dSamplingBuffer[bytesSampled], 0, (SINC_TAPS/2) * sizeof (double));
 
 	const int32_t readLength = bytesSampled;
@@ -602,7 +649,6 @@
 
 	if (!initSincTable(dRatio))
 	{
-		free(dBuffer);
 		statusOutOfMemory();
 		return -1;
 	}
@@ -612,7 +658,7 @@
 	int8_t *output = &song->sampleData[song->samples[editor.currSample].offset];
 	const double dDelta = inputFrequency / dOutputFrequency;
 
-	// pre-centered (this is safe, look at how fSamplingBufferOrig is alloc'd)
+	// pre-centered (this is safe, look at how dSamplingBufferOrig is alloc'd)
 	const double *dSmpPtr = &dSamplingBuffer[-((SINC_TAPS/2)-1)];
 
 	double dPhase = 0.0;
@@ -622,12 +668,15 @@
 		double dSmp = sinc(dSmpPtr, dPhase);
 		dBuffer[i] = dSmp;
 
-		dSmp = fabs(dSmp);
+		// dSmp = fabs(dSmp)
+		if (dSmp < 0.0)
+			dSmp = -dSmp;
+
 		if (dSmp > dPeakAmp)
 			dPeakAmp = dSmp;
 
 		dPhase += dDelta;
-		const int32_t wholeSamples = (const int32_t)dPhase;
+		const int32_t wholeSamples = (int32_t)dPhase;
 		dPhase -= wholeSamples;
 		dSmpPtr += wholeSamples;
 	}
@@ -647,8 +696,13 @@
 
 	for (int32_t i = 0; i < writeLength; i++)
 	{
-		const int32_t smp32 = (const int32_t)round(dBuffer[i] * dAmp);
-		assert(smp32 >= -128 && smp32 <= 127); // shouldn't happen according to dAmp (but just in case)
+		double dSmp = dBuffer[i] * dAmp;
+
+		// faster than calling round()
+		     if (dSmp < 0.0) dSmp -= 0.5;
+		else if (dSmp > 0.0) dSmp += 0.5;
+		const int32_t smp32 = (int32_t)dSmp; // rounded
+
 		output[i] = (int8_t)smp32;
 	}
 
--- a/src/pt2_sampling.h
+++ b/src/pt2_sampling.h
@@ -3,6 +3,9 @@
 #include <stdint.h>
 #include <stdbool.h>
 
+bool initKaiserTable(void); // called once on tracker init
+void freeKaiserTable(void);
+
 void stopSampling(void);
 void freeAudioDeviceList(void);
 void renderSampleMonitor(void);
--- a/vs2019_project/pt2-clone/pt2-clone.vcxproj
+++ b/vs2019_project/pt2-clone/pt2-clone.vcxproj
@@ -97,7 +97,6 @@
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
-      <FloatingPointModel>Fast</FloatingPointModel>
       <DebugInformationFormat>None</DebugInformationFormat>
       <OmitFramePointers>true</OmitFramePointers>
       <CompileAsWinRT>false</CompileAsWinRT>
@@ -143,7 +142,6 @@
       <BasicRuntimeChecks>Default</BasicRuntimeChecks>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
-      <FloatingPointModel>Fast</FloatingPointModel>
       <DebugInformationFormat>None</DebugInformationFormat>
       <OmitFramePointers>true</OmitFramePointers>
       <BufferSecurityCheck>false</BufferSecurityCheck>
@@ -178,7 +176,6 @@
     <ClCompile>
       <WarningLevel>Level4</WarningLevel>
       <PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;DEBUG;_DEBUG;WIN32;_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>
-      <FloatingPointModel>Fast</FloatingPointModel>
       <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
       <TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
@@ -207,7 +204,6 @@
     <ClCompile>
       <WarningLevel>Level4</WarningLevel>
       <PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;DEBUG;_DEBUG;WIN32;_CRT_SECURE_NO_WARNINGS</PreprocessorDefinitions>
-      <FloatingPointModel>Fast</FloatingPointModel>
       <OmitFramePointers>false</OmitFramePointers>
       <TreatWChar_tAsBuiltInType>false</TreatWChar_tAsBuiltInType>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
@@ -252,6 +248,7 @@
     <ClInclude Include="..\..\src\pt2_helpers.h" />
     <ClInclude Include="..\..\src\pt2_keyboard.h" />
     <ClInclude Include="..\..\src\pt2_ledfilter.h" />
+    <ClInclude Include="..\..\src\pt2_math.h" />
     <ClInclude Include="..\..\src\pt2_mod2wav.h" />
     <ClInclude Include="..\..\src\pt2_module_loader.h" />
     <ClInclude Include="..\..\src\pt2_module_saver.h" />
@@ -305,6 +302,7 @@
     <ClCompile Include="..\..\src\pt2_keyboard.c" />
     <ClCompile Include="..\..\src\pt2_ledfilter.c" />
     <ClCompile Include="..\..\src\pt2_main.c" />
+    <ClCompile Include="..\..\src\pt2_math.c" />
     <ClCompile Include="..\..\src\pt2_mod2wav.c" />
     <ClCompile Include="..\..\src\pt2_module_loader.c" />
     <ClCompile Include="..\..\src\pt2_rcfilter.c" />
--- a/vs2019_project/pt2-clone/pt2-clone.vcxproj.filters
+++ b/vs2019_project/pt2-clone/pt2-clone.vcxproj.filters
@@ -102,6 +102,9 @@
     <ClInclude Include="..\..\src\pt2_downsamplers2x.h">
       <Filter>headers</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\src\pt2_math.h">
+      <Filter>headers</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\..\src\pt2_audio.c" />
@@ -190,6 +193,7 @@
     <ClCompile Include="..\..\src\pt2_ledfilter.c" />
     <ClCompile Include="..\..\src\pt2_chordmaker.c" />
     <ClCompile Include="..\..\src\pt2_downsample2x.c" />
+    <ClCompile Include="..\..\src\pt2_math.c" />
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="..\..\src\pt2-clone.rc" />