ref: 3e12cfe8db14dd9b92e10a8980b20933ba5c150e
dir: /src/pt2_audio.c/
// for finding memory leaks in debug mode with Visual Studio 
#if defined _DEBUG && defined _MSC_VER
#include <crtdbg.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <SDL2/SDL.h>
#ifdef _WIN32
#include <io.h>
#else
#include <unistd.h>
#endif
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <limits.h>
#include "pt2_math.h"
#include "pt2_audio.h"
#include "pt2_header.h"
#include "pt2_helpers.h"
#include "pt2_blep.h"
#include "pt2_config.h"
#include "pt2_tables.h"
#include "pt2_textout.h"
#include "pt2_visuals.h"
#include "pt2_scopes.h"
#include "pt2_mod2wav.h"
#include "pt2_pat2smp.h"
#include "pt2_sync.h"
#include "pt2_structs.h"
#include "pt2_rcfilter.h"
#include "pt2_ledfilter.h"
#include "pt2_downsample2x.h"
#include "pt2_hpc.h"
#define STEREO_NORM_FACTOR 0.5 /* cumulative mid/side normalization factor (1/sqrt(2))*(1/sqrt(2)) */
#define INITIAL_DITHER_SEED 0x12345000
static volatile bool ledFilterEnabled;
static volatile uint8_t filterModel;
static bool amigaPanFlag, useA1200LowPassFilter;
static int32_t randSeed = INITIAL_DITHER_SEED, stereoSeparation = 100;
static uint32_t audLatencyPerfValInt, audLatencyPerfValFrac;
static uint64_t tickTime64, tickTime64Frac;
static double *dMixBufferL, *dMixBufferR, *dMixBufferLUnaligned, *dMixBufferRUnaligned;
static double dPrngStateL, dPrngStateR, dSideFactor;
static blep_t blep[AMIGA_VOICES];
static rcFilter_t filterLoA500, filterHiA500, filterLoA1200, filterHiA1200;
static ledFilter_t filterLED;
static SDL_AudioDeviceID dev;
static void processFiltersA1200_NoLED(int32_t numSamples);
static void processFiltersA1200_LED(int32_t numSamples);
static void processFiltersA500_NoLED(int32_t numSamples);
static void processFiltersA500_LED(int32_t numSamples);
static void (*processFiltersFunc)(int32_t);
// for audio/video syncing
static uint32_t tickTimeLen, tickTimeLenFrac;
// globalized
audio_t audio;
paulaVoice_t paula[AMIGA_VOICES];
bool intMusic(void); // defined in pt2_replayer.c
static void updateFilterFunc(void)
{
	if (filterModel == FILTERMODEL_A500)
	{
		if (ledFilterEnabled)
			processFiltersFunc = processFiltersA500_LED;
		else
			processFiltersFunc = processFiltersA500_NoLED;
	}
	else // A1200
	{
		if (ledFilterEnabled)
			processFiltersFunc = processFiltersA1200_LED;
		else
			processFiltersFunc = processFiltersA1200_NoLED;
	}
}
void setLEDFilter(bool state, bool doLockAudio)
{
	if (ledFilterEnabled == state)
		return; // same state as before!
	const bool audioWasntLocked = !audio.locked;
	if (doLockAudio && audioWasntLocked)
		lockAudio();
	clearLEDFilterState(&filterLED);
	editor.useLEDFilter = state;
	ledFilterEnabled = editor.useLEDFilter;
	updateFilterFunc();
	if (doLockAudio && audioWasntLocked)
		unlockAudio();
}
void toggleLEDFilter(void)
{
	const bool audioWasntLocked = !audio.locked;
	if (audioWasntLocked)
		lockAudio();
	clearLEDFilterState(&filterLED);
	editor.useLEDFilter ^= 1;
	ledFilterEnabled = editor.useLEDFilter;
	updateFilterFunc();
	if (audioWasntLocked)
		unlockAudio();
}
static void calcAudioLatencyVars(int32_t audioBufferSize, int32_t audioFreq)
{
	double dInt, dFrac;
	if (audioFreq == 0)
		return;
	const double dAudioLatencySecs = audioBufferSize / (double)audioFreq;
	dFrac = modf(dAudioLatencySecs * hpcFreq.dFreq, &dInt);
	// integer part
	audLatencyPerfValInt = (uint32_t)dInt;
	// fractional part (scaled to 0..2^32-1)
	audLatencyPerfValFrac = (uint32_t)((dFrac * (UINT32_MAX+1.0)) + 0.5); // rounded
}
void setSyncTickTimeLen(uint32_t timeLen, uint32_t timeLenFrac)
{
	tickTimeLen = timeLen;
	tickTimeLenFrac = timeLenFrac;
}
void lockAudio(void)
{
	if (dev != 0)
		SDL_LockAudioDevice(dev);
	audio.locked = true;
	audio.resetSyncTickTimeFlag = true;
	resetChSyncQueue();
}
void unlockAudio(void)
{
	if (dev != 0)
		SDL_UnlockAudioDevice(dev);
	audio.resetSyncTickTimeFlag = true;
	resetChSyncQueue();
	audio.locked = false;
}
void mixerUpdateLoops(void) // updates Paula loop (+ scopes)
{
	const bool audioWasntLocked = !audio.locked;
	if (audioWasntLocked)
		lockAudio();
	for (int32_t i = 0; i < AMIGA_VOICES; i++)
	{
		const moduleChannel_t *ch = &song->channels[i];
		if (ch->n_samplenum == editor.currSample)
		{
			const moduleSample_t *s = &song->samples[editor.currSample];
			paulaSetData(i, ch->n_start + s->loopStart);
			paulaSetLength(i, (uint16_t)(s->loopLength >> 1));
		}
	}
	if (audioWasntLocked)
		unlockAudio();
}
void mixerKillVoice(int32_t ch)
{
	const bool audioWasntLocked = !audio.locked;
	if (audioWasntLocked)
		lockAudio();
	memset(&paula[ch], 0, sizeof (paulaVoice_t));
	memset(&blep[ch], 0, sizeof (blep_t));
	stopScope(ch); // it should be safe to clear the scope now
	memset(&scope[ch], 0, sizeof (scope_t));
	if (audioWasntLocked)
		unlockAudio();
}
void turnOffVoices(void)
{
	const bool audioWasntLocked = !audio.locked;
	if (audioWasntLocked)
		lockAudio();
	for (int32_t i = 0; i < AMIGA_VOICES; i++)
		mixerKillVoice(i);
	clearRCFilterState(&filterLoA500);
	clearRCFilterState(&filterLoA1200);
	clearRCFilterState(&filterHiA500);
	clearRCFilterState(&filterHiA1200);
	clearLEDFilterState(&filterLED);
	resetAudioDithering();
	editor.tuningFlag = false;
	if (audioWasntLocked)
		unlockAudio();
}
void resetCachedMixerPeriod(void)
{
	paulaVoice_t *v = paula;
	for (int32_t i = 0; i < AMIGA_VOICES; i++, v++)
	{
		v->oldPeriod = -1;
		v->dOldVoiceDelta = 0.0;
		v->dOldVoiceDeltaMul = 1.0;
	}
}
// the following routines are only called from the mixer thread.
void paulaSetPeriod(int32_t ch, uint16_t period)
{
	double dPeriodToDeltaDiv;
	paulaVoice_t *v = &paula[ch];
	int32_t realPeriod = period;
	if (realPeriod == 0)
		realPeriod = 1+65535; // confirmed behavior on real Amiga
	else if (realPeriod < 113)
		realPeriod = 113; // close to what happens on real Amiga (and needed for BLEP synthesis)
	if (editor.songPlaying)
	{
		v->syncPeriod = realPeriod;
		v->syncFlags |= SET_SCOPE_PERIOD;
	}
	else
	{
		scopeSetPeriod(ch, realPeriod);
	}
	// if the new period was the same as the previous period, use cached delta
	if (realPeriod != v->oldPeriod)
	{
		v->oldPeriod = realPeriod;
		// this period is not cached, calculate mixer deltas
		// during PAT2SMP, use different audio output rates
		if (editor.isSMPRendering)
			dPeriodToDeltaDiv = editor.pat2SmpHQ ? (PAULA_PAL_CLK / PAT2SMP_HI_FREQ) : (PAULA_PAL_CLK / PAT2SMP_LO_FREQ);
		else
			dPeriodToDeltaDiv = audio.dPeriodToDeltaDiv;
		v->dOldVoiceDelta = dPeriodToDeltaDiv / realPeriod;
		if (audio.oversamplingFlag || editor.isSMPRendering)
			v->dOldVoiceDelta *= 0.5; // /2 since we do 2x oversampling
		// for BLEP synthesis (prevents division in inner mix loop)
		v->dOldVoiceDeltaMul = 1.0 / v->dOldVoiceDelta;
	}
	v->AUD_PER_delta = v->dOldVoiceDelta;
	
	// set BLEP stuff
	v->dDeltaMul = v->dOldVoiceDeltaMul;
	if (v->dLastDelta == 0.0)
		v->dLastDelta = v->AUD_PER_delta;
}
void paulaSetVolume(int32_t ch, uint16_t vol)
{
	paulaVoice_t *v = &paula[ch];
	int32_t realVol = vol;
	// this is what WinUAE does
	realVol &= 127;
	if (realVol > 64)
		realVol = 64;
	// ------------------------
	// multiplying by this also scales the sample from -128..127 -> -1.0 .. ~0.99
	v->AUD_VOL = realVol * (1.0 / (128.0 * 64.0));
	if (editor.songPlaying)
	{
		v->syncVolume = (uint8_t)realVol;
		v->syncFlags |= SET_SCOPE_VOLUME;
	}
	else
	{
		scope[ch].volume = (uint8_t)realVol;
	}
}
void paulaSetLength(int32_t ch, uint16_t len)
{
	paulaVoice_t *v = &paula[ch];
	v->AUD_LEN = len;
	if (editor.songPlaying)
		v->syncFlags |= SET_SCOPE_LENGTH;
	else
		scope[ch].newLength = len*2;
}
void paulaSetData(int32_t ch, const int8_t *src)
{
	paulaVoice_t *v = &paula[ch];
	if (src == NULL)
		src = &song->sampleData[config.reservedSampleOffset]; // 128K reserved sample
	v->AUD_LC = src;
	if (editor.songPlaying)
		v->syncFlags |= SET_SCOPE_DATA;
	else
		scope[ch].newData = src;
}
void paulaStopDMA(int32_t ch)
{
	paulaVoice_t *v = &paula[ch];
	v->DMA_active = false;
	if (editor.songPlaying)
		v->syncFlags |= STOP_SCOPE;
	else
		scope[ch].active = false;
}
void paulaStartDMA(int32_t ch)
{
	paulaVoice_t *v = &paula[ch];
	if (v->AUD_LC == NULL)
		v->AUD_LC = &song->sampleData[config.reservedSampleOffset]; // 128K reserved sample
	/* This is not really accurate to what happens on Paula
	** during DMA start, but it's good enough.
	*/
	v->dDelta = v->AUD_PER_delta;
	v->location = v->AUD_LC;
	v->lengthCounter = v->AUD_LEN;
	// pre-fill AUDxDAT buffer
	v->AUD_DAT[0] = *v->location++;
	v->AUD_DAT[1] = *v->location++;
	v->sampleCounter = 2;
	// set current sample point
	v->dSample = v->AUD_DAT[0] * v->AUD_VOL;
	// progress AUD_DAT buffer
	v->AUD_DAT[0] = v->AUD_DAT[1];
	v->sampleCounter--;
	// set BLEP stuff
	v->dLastPhase = 0.0;
	v->dLastDelta = v->dDelta;
	v->dBlepOffset = 0.0;
	v->dPhase = 0.0;
	v->DMA_active = true;
	if (editor.songPlaying)
	{
		v->syncTriggerData = v->AUD_LC;
		v->syncTriggerLength = v->AUD_LEN * 2;
		v->syncFlags |= TRIGGER_SCOPE;
	}
	else
	{
		scope_t *s = &scope[ch];
		s->newData = v->AUD_LC;
		s->newLength = v->AUD_LEN * 2;
		scopeTrigger(ch);
	}
}
void toggleFilterModel(void)
{
	const bool audioWasntLocked = !audio.locked;
	if (audioWasntLocked)
		lockAudio();
	clearRCFilterState(&filterLoA500);
	clearRCFilterState(&filterLoA1200);
	clearRCFilterState(&filterHiA500);
	clearRCFilterState(&filterHiA1200);
	clearLEDFilterState(&filterLED);
	filterModel ^= 1;
	updateFilterFunc();
	if (filterModel == FILTERMODEL_A500)
		displayMsg("AUDIO: AMIGA 500");
	else
		displayMsg("AUDIO: AMIGA 1200");
	if (audioWasntLocked)
		unlockAudio();
}
void mixChannels(int32_t numSamples)
{
	double *dMixBufSelect[AMIGA_VOICES] = { dMixBufferL, dMixBufferR, dMixBufferR, dMixBufferL };
	memset(dMixBufferL, 0, numSamples * sizeof (double));
	memset(dMixBufferR, 0, numSamples * sizeof (double));
	paulaVoice_t *v = paula;
	blep_t *bSmp = blep;
	for (int32_t i = 0; i < AMIGA_VOICES; i++, v++, bSmp++)
	{
		/* We only need to test for a NULL-pointer once.
		** When pointers are messed with in the tracker, the mixer
		** is temporarily forced offline, and its voice pointers are
		** cleared to prevent expired pointer addresses.
		*/
		if (!v->DMA_active || v->location == NULL || v->AUD_LC == NULL)
			continue;
		double *dMixBuf = dMixBufSelect[i]; // what output channel to mix into (L, R, R, L)
		for (int32_t j = 0; j < numSamples; j++)
		{
			double dSmp = v->dSample;
			if (dSmp != bSmp->dLastValue)
			{
				if (v->dLastDelta > v->dLastPhase)
					blepAdd(bSmp, v->dBlepOffset, bSmp->dLastValue - dSmp);
				bSmp->dLastValue = dSmp;
			}
			if (bSmp->samplesLeft > 0)
				dSmp = blepRun(bSmp, dSmp);
			dMixBuf[j] += dSmp;
			v->dPhase += v->dDelta;
			if (v->dPhase >= 1.0) // deltas can't be >= 1.0, so this is safe
			{
				v->dPhase -= 1.0;
				// Paula only updates period (delta) during period refetching (this stage)
				v->dDelta = v->AUD_PER_delta;
				if (v->sampleCounter == 0)
				{
					// it's time to read new samples from DMA
					if (--v->lengthCounter == 0)
					{
						v->lengthCounter = v->AUD_LEN;
						v->location = v->AUD_LC;
					}
					// fill DMA data buffer
					v->AUD_DAT[0] = *v->location++;
					v->AUD_DAT[1] = *v->location++;
					v->sampleCounter = 2;
				}
				/* Pre-compute current sample point.
				** Output volume is only read from AUDxVOL at this stage,
				** and we don't emulate volume PWM anyway, so we can
				** pre-multiply by volume at this point.
				*/
				v->dSample = v->AUD_DAT[0] * v->AUD_VOL; // -128..127 * 0.0 .. 1.0
				// progress AUD_DAT buffer
				v->AUD_DAT[0] = v->AUD_DAT[1];
				v->sampleCounter--;
				// setup BLEP stuff
				v->dBlepOffset = v->dPhase * v->dDeltaMul;
				v->dLastPhase = v->dPhase;
				v->dLastDelta = v->dDelta;
			}
		}
	}
}
void resetAudioDithering(void)
{
	randSeed = INITIAL_DITHER_SEED;
	dPrngStateL = 0.0;
	dPrngStateR = 0.0;
}
static inline int32_t random32(void)
{
	// LCG random 32-bit generator (quite good and fast)
	randSeed *= 134775813;
	randSeed++;
	return randSeed;
}
static void processFiltersA1200_NoLED(int32_t numSamples)
{
	if (useA1200LowPassFilter)
	{
		for (int32_t i = 0; i < numSamples; i++)
		{
			double dOut[2];
			dOut[0] = dMixBufferL[i];
			dOut[1] = dMixBufferR[i];
			// low-pass filter
			RCLowPassFilterStereo(&filterLoA1200, dOut, dOut);
			// high-pass RC filter
			RCHighPassFilterStereo(&filterHiA1200, dOut, dOut);
			dMixBufferL[i] = dOut[0];
			dMixBufferR[i] = dOut[1];
		}
	}
	else
	{
		for (int32_t i = 0; i < numSamples; i++)
		{
			double dOut[2];
			dOut[0] = dMixBufferL[i];
			dOut[1] = dMixBufferR[i];
			// high-pass RC filter
			RCHighPassFilterStereo(&filterHiA1200, dOut, dOut);
			dMixBufferL[i] = dOut[0];
			dMixBufferR[i] = dOut[1];
		}
	}
}
static void processFiltersA1200_LED(int32_t numSamples)
{
	if (useA1200LowPassFilter)
	{
		for (int32_t i = 0; i < numSamples; i++)
		{
			double dOut[2];
			dOut[0] = dMixBufferL[i];
			dOut[1] = dMixBufferR[i];
			// low-pass filter
			RCLowPassFilterStereo(&filterLoA1200, dOut, dOut);
			// "LED" Sallen-Key filter
			LEDFilter(&filterLED, dOut, dOut);
			// high-pass RC filter
			RCHighPassFilterStereo(&filterHiA1200, dOut, dOut);
			dMixBufferL[i] = dOut[0];
			dMixBufferR[i] = dOut[1];
		}
	}
	else
	{
		for (int32_t i = 0; i < numSamples; i++)
		{
			double dOut[2];
			dOut[0] = dMixBufferL[i];
			dOut[1] = dMixBufferR[i];
			// "LED" Sallen-Key filter
			LEDFilter(&filterLED, dOut, dOut);
			// high-pass RC filter
			RCHighPassFilterStereo(&filterHiA1200, dOut, dOut);
			dMixBufferL[i] = dOut[0];
			dMixBufferR[i] = dOut[1];
		}
	}
}
static void processFiltersA500_NoLED(int32_t numSamples)
{
	for (int32_t i = 0; i < numSamples; i++)
	{
		double dOut[2];
		dOut[0] = dMixBufferL[i];
		dOut[1] = dMixBufferR[i];
		// low-pass RC filter
		RCLowPassFilterStereo(&filterLoA500, dOut, dOut);
		// high-pass RC filter
		RCHighPassFilterStereo(&filterHiA500, dOut, dOut);
		dMixBufferL[i] = dOut[0];
		dMixBufferR[i] = dOut[1];
	}
}
static void processFiltersA500_LED(int32_t numSamples)
{
	for (int32_t i = 0; i < numSamples; i++)
	{
		double dOut[2];
		dOut[0] = dMixBufferL[i];
		dOut[1] = dMixBufferR[i];
		// low-pass RC filter
		RCLowPassFilterStereo(&filterLoA500, dOut, dOut);
		// "LED" Sallen-Key filter
		LEDFilter(&filterLED, dOut, dOut);
		// high-pass RC filter
		RCHighPassFilterStereo(&filterHiA500, dOut, dOut);
		dMixBufferL[i] = dOut[0];
		dMixBufferR[i] = dOut[1];
	}
}
#define NORM_FACTOR 2.0 /* nominally correct, but can clip from high-pass filter overshoot */
static inline void processMixedSamplesAmigaPanning(int32_t i, int16_t *out)
{
	int32_t smp32;
	double dPrng;
	double dL = dMixBufferL[i];
	double dR = dMixBufferR[i];
	// normalize w/ phase-inversion (A500/A1200 has a phase-inverted audio signal)
	dL *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
	dR *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
	// left channel - 1-bit triangular dithering (high-pass filtered)
	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
	dL = (dL + dPrng) - dPrngStateL;
	dPrngStateL = dPrng;
	smp32 = (int32_t)dL;
	CLAMP16(smp32);
	out[0] = (int16_t)smp32;
	// right channel - 1-bit triangular dithering (high-pass filtered)
	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
	dR = (dR + dPrng) - dPrngStateR;
	dPrngStateR = dPrng;
	smp32 = (int32_t)dR;
	CLAMP16(smp32);
	out[1] = (int16_t)smp32;
}
static inline void processMixedSamples(int32_t i, int16_t *out)
{
	int32_t smp32;
	double dPrng;
	double dL = dMixBufferL[i];
	double dR = dMixBufferR[i];
	// apply stereo separation
	const double dOldL = dL;
	const double dOldR = dR;
	double dMid  = (dOldL + dOldR) * STEREO_NORM_FACTOR;
	double dSide = (dOldL - dOldR) * dSideFactor;
	dL = dMid + dSide;
	dR = dMid - dSide;
	// normalize w/ phase-inversion (A500/A1200 has a phase-inverted audio signal)
	dL *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
	dR *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
	// left channel - 1-bit triangular dithering (high-pass filtered)
	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
	dL = (dL + dPrng) - dPrngStateL;
	dPrngStateL = dPrng;
	smp32 = (int32_t)dL;
	CLAMP16(smp32);
	out[0] = (int16_t)smp32;
	// right channel - 1-bit triangular dithering (high-pass filtered)
	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
	dR = (dR + dPrng) - dPrngStateR;
	dPrngStateR = dPrng;
	smp32 = (int32_t)dR;
	CLAMP16(smp32);
	out[1] = (int16_t)smp32;
}
static inline void processMixedSamplesAmigaPanning_2x(int32_t i, int16_t *out) // 2x oversampling
{
	int32_t smp32;
	double dPrng, dL, dR;
	// 2x downsampling (decimation)
	const uint32_t offset1 = (i << 1) + 0;
	const uint32_t offset2 = (i << 1) + 1;
	dL = decimate2x_L(dMixBufferL[offset1], dMixBufferL[offset2]);
	dR = decimate2x_R(dMixBufferR[offset1], dMixBufferR[offset2]);
	// normalize w/ phase-inversion (A500/A1200 has a phase-inverted audio signal)
	dL *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
	dR *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
	// left channel - 1-bit triangular dithering (high-pass filtered)
	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
	dL = (dL + dPrng) - dPrngStateL;
	dPrngStateL = dPrng;
	smp32 = (int32_t)dL;
	CLAMP16(smp32);
	out[0] = (int16_t)smp32;
	// right channel - 1-bit triangular dithering (high-pass filtered)
	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
	dR = (dR + dPrng) - dPrngStateR;
	dPrngStateR = dPrng;
	smp32 = (int32_t)dR;
	CLAMP16(smp32);
	out[1] = (int16_t)smp32;
}
static inline void processMixedSamples_2x(int32_t i, int16_t *out) // 2x oversampling
{
	int32_t smp32;
	double dPrng, dL, dR;
	// 2x downsampling (decimation)
	const uint32_t offset1 = (i << 1) + 0;
	const uint32_t offset2 = (i << 1) + 1;
	dL = decimate2x_L(dMixBufferL[offset1], dMixBufferL[offset2]);
	dR = decimate2x_R(dMixBufferR[offset1], dMixBufferR[offset2]);
	// apply stereo separation
	const double dOldL = dL;
	const double dOldR = dR;
	double dMid  = (dOldL + dOldR) * STEREO_NORM_FACTOR;
	double dSide = (dOldL - dOldR) * dSideFactor;
	dL = dMid + dSide;
	dR = dMid - dSide;
	// normalize w/ phase-inversion (A500/A1200 has a phase-inverted audio signal)
	dL *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
	dR *= NORM_FACTOR * (-INT16_MAX / (double)AMIGA_VOICES);
	// left channel - 1-bit triangular dithering (high-pass filtered)
	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
	dL = (dL + dPrng) - dPrngStateL;
	dPrngStateL = dPrng;
	smp32 = (int32_t)dL;
	CLAMP16(smp32);
	out[0] = (int16_t)smp32;
	// right channel - 1-bit triangular dithering (high-pass filtered)
	dPrng = random32() * (0.5 / INT32_MAX); // -0.5..0.5
	dR = (dR + dPrng) - dPrngStateR;
	dPrngStateR = dPrng;
	smp32 = (int32_t)dR;
	CLAMP16(smp32);
	out[1] = (int16_t)smp32;
}
void outputAudio(int16_t *target, int32_t numSamples)
{
	if (editor.isSMPRendering)
	{
		// render to sample (PAT2SMP)
		int32_t samplesTodo = numSamples;
		if (editor.pat2SmpPos+samplesTodo > config.maxSampleLength)
			samplesTodo = config.maxSampleLength-editor.pat2SmpPos;
		// mix channels (with 2x oversampling, PAT2SMP needs it)
		mixChannels(samplesTodo*2);
		double *dOutStream = &editor.dPat2SmpBuf[editor.pat2SmpPos];
		for (int32_t i = 0; i < samplesTodo; i++)
		{
			// 2x downsampling (decimation)
			double dL, dR;
			const uint32_t offset1 = (i << 1) + 0;
			const uint32_t offset2 = (i << 1) + 1;
			dL = decimate2x_L(dMixBufferL[offset1], dMixBufferL[offset2]);
			dR = decimate2x_R(dMixBufferR[offset1], dMixBufferR[offset2]);
			dOutStream[i] = (dL + dR) * 0.5; // normalized to -128..127 later
		}
		editor.pat2SmpPos += samplesTodo;
		if (editor.pat2SmpPos >= config.maxSampleLength)
		{
			editor.smpRenderingDone = true;
			updateWindowTitle(MOD_IS_MODIFIED);
		}
	}
	else
	{
		if (audio.oversamplingFlag) // 2x oversampling
		{
			// mix and filter channels (at 2x rate)
			mixChannels(numSamples*2);
			processFiltersFunc(numSamples*2);
			// downsample, normalize and dither
			int16_t out[2];
			int16_t *outStream = target;
			if (stereoSeparation == 100)
			{
				for (int32_t i = 0; i < numSamples; i++)
				{
					processMixedSamplesAmigaPanning_2x(i, out);
					*outStream++ = out[0];
					*outStream++ = out[1];
				}
			}
			else
			{
				for (int32_t i = 0; i < numSamples; i++)
				{
					processMixedSamples_2x(i, out);
					*outStream++ = out[0];
					*outStream++ = out[1];
				}
			}
		}
		else
		{
			// mix and filter channels
			mixChannels(numSamples);
			processFiltersFunc(numSamples);
			// normalize and dither
			int16_t out[2];
			int16_t *outStream = target;
			if (stereoSeparation == 100)
			{
				for (int32_t i = 0; i < numSamples; i++)
				{
					processMixedSamplesAmigaPanning(i, out);
					*outStream++ = out[0];
					*outStream++ = out[1];
				}
			}
			else
			{
				for (int32_t i = 0; i < numSamples; i++)
				{
					processMixedSamples(i, out);
					*outStream++ = out[0];
					*outStream++ = out[1];
				}
			}
		}
	}
}
static void fillVisualsSyncBuffer(void)
{
	chSyncData_t chSyncData;
	if (audio.resetSyncTickTimeFlag)
	{
		audio.resetSyncTickTimeFlag = false;
		tickTime64 = SDL_GetPerformanceCounter() + audLatencyPerfValInt;
		tickTime64Frac = audLatencyPerfValFrac;
	}
	moduleChannel_t *c = song->channels;
	paulaVoice_t *v = paula;
	syncedChannel_t *s = chSyncData.channels;
	for (int32_t i = 0; i < AMIGA_VOICES; i++, c++, s++, v++)
	{
		s->flags = v->syncFlags | c->syncFlags;
		c->syncFlags = v->syncFlags = 0; // clear sync flags
		s->volume = v->syncVolume;
		s->period = v->syncPeriod;
		s->triggerData = v->syncTriggerData;
		s->triggerLength = v->syncTriggerLength;
		s->newData = v->AUD_LC;
		s->newLength = v->AUD_LEN * 2;
		s->vuVolume = c->syncVuVolume;
		s->analyzerVolume = c->syncAnalyzerVolume;
		s->analyzerPeriod = c->syncAnalyzerPeriod;
	}
	chSyncData.timestamp = tickTime64;
	chQueuePush(chSyncData);
	tickTime64 += tickTimeLen;
	tickTime64Frac += tickTimeLenFrac;
	if (tickTime64Frac > 0xFFFFFFFF)
	{
		tickTime64Frac &= 0xFFFFFFFF;
		tickTime64++;
	}
}
static void SDLCALL audioCallback(void *userdata, Uint8 *stream, int len)
{
	if (audio.forceSoundCardSilence) // during MOD2WAV
	{
		memset(stream, 0, len);
		return;
	}
	int16_t *streamOut = (int16_t *)stream;
	int32_t samplesLeft = len >> 2;
	while (samplesLeft > 0)
	{
		if (audio.tickSampleCounter64 <= 0)
		{
			// new replayer tick
			if (editor.songPlaying)
			{
				intMusic();
				fillVisualsSyncBuffer();
			}
			audio.tickSampleCounter64 += audio.samplesPerTick64;
		}
		const int32_t remainingTick = (audio.tickSampleCounter64 + UINT32_MAX) >> 32; // ceil rounding (upwards)
		int32_t samplesToMix = samplesLeft;
		if (samplesToMix > remainingTick)
			samplesToMix = remainingTick;
		outputAudio(streamOut, samplesToMix);
		streamOut += samplesToMix<<1;
		samplesLeft -= samplesToMix;
		audio.tickSampleCounter64 -= (int64_t)samplesToMix << 32;
	}
	(void)userdata;
}
static void calculateFilterCoeffs(void)
{
	/* Amiga 500/1200 filter emulation
	**
	** aciddose:
	** First comes a static low-pass 6dB formed by the supply current
	** from the Paula's mixture of channels A+B / C+D into the opamp with
	** 0.1uF capacitor and 360 ohm resistor feedback in inverting mode biased by
	** dac vRef (used to center the output).
	**
	** R = 360 ohm
	** C = 0.1uF
	** Low Hz = 4420.97~ = 1 / (2pi * 360 * 0.0000001)
	**
	** Under spice simulation the circuit yields -3dB = 4400Hz.
	** In the Amiga 1200, the low-pass cutoff is ~34kHz, so the
	** static low-pass filter is disabled in the mixer in A1200 mode.
	**
	** Next comes a bog-standard Sallen-Key filter ("LED") with:
	** R1 = 10K ohm
	** R2 = 10K ohm
	** C1 = 6800pF
	** C2 = 3900pF
	** Q ~= 1/sqrt(2)
	**
	** This filter is optionally bypassed by an MPF-102 JFET chip when
	** the LED filter is turned off.
	**
	** Under spice simulation the circuit yields -3dB = 2800Hz.
	** 90 degrees phase = 3000Hz (so, should oscillate at 3kHz!)
	**
	** The buffered output of the Sallen-Key passes into an RC high-pass with:
	** R = 1.39K ohm (1K ohm + 390 ohm)
	** C = 22uF (also C = 330nF, for improved high-frequency)
	**
	** High Hz = 5.2~ = 1 / (2pi * 1390 * 0.000022)
	** Under spice simulation the circuit yields -3dB = 5.2Hz.
	**
	** 8bitbubsy:
	** Keep in mind that many of the Amiga schematics that are floating around on
	** the internet have wrong RC values! They were most likely very early schematics
	** that didn't change before production (or changes that never reached production).
	** This has been confirmed by measuring the components on several Amiga motherboards.
	**
	** Correct values for A500, >rev3 (?) (A500_R6.pdf):
	** - 1-pole RC 6dB/oct low-pass: R=360 ohm, C=0.1uF
	** - Sallen-key low-pass ("LED"): R1/R2=10k ohm, C1=6800pF, C2=3900pF
	** - 1-pole RC 6dB/oct high-pass: R=1390 ohm (1000+390), C=22.33uF (22+0.33)
	**
	** Correct values for A1200, all revs (A1200_R2.pdf):
	** - 1-pole RC 6dB/oct low-pass: R=680 ohm, C=6800pF
	** - Sallen-key low-pass ("LED"): R1/R2=10k ohm, C1=6800pF, C2=3900pF (same as A500)
	** - 1-pole RC 6dB/oct high-pass: R=1390 ohm (1000+390), C=22uF
	*/
	double dAudioFreq = audio.outputRate;
	double R, C, R1, R2, C1, C2, fc, fb;
	if (audio.oversamplingFlag)
		dAudioFreq *= 2.0; // 2x oversampling
	// A500 1-pole (6db/oct) static RC low-pass filter:
	R = 360.0; // R321 (360 ohm)
	C = 1e-7;  // C321 (0.1uF)
	fc = 1.0 / (PT2_TWO_PI * R * C); // cutoff = ~4420.97Hz
	calcRCFilterCoeffs(dAudioFreq, fc, &filterLoA500);
	// A1200 1-pole (6db/oct) static RC low-pass filter:
	R = 680.0;  // R321 (680 ohm)
	C = 6.8e-9; // C321 (6800pF)
	fc = 1.0 / (PT2_TWO_PI * R * C); // cutoff = ~34419.32Hz
	useA1200LowPassFilter = false;
	if (dAudioFreq/2.0 > fc)
	{
		calcRCFilterCoeffs(dAudioFreq, fc, &filterLoA1200);
		useA1200LowPassFilter = true;
	}
	// Sallen-Key filter ("LED" filter, same values on A500/A1200):
	R1 = 10000.0; // R322 (10K ohm)
	R2 = 10000.0; // R323 (10K ohm)
	C1 = 6.8e-9;  // C322 (6800pF)
	C2 = 3.9e-9;  // C323 (3900pF)
	fc = 1.0 / (PT2_TWO_PI * pt2_sqrt(R1 * R2 * C1 * C2)); // cutoff = ~3090.53Hz
	fb = 0.125/2.0; // Fb = 0.125 : Q ~= 1/sqrt(2) (Butterworth) (8bb: was 0.125, but /2 gives a closer gain!)
	calcLEDFilterCoeffs(dAudioFreq, fc, fb, &filterLED);
	// A500 1-pole (6dB/oct) static RC high-pass filter:
	R = 1390.0;   // R324 (1K ohm) + R325 (390 ohm)
	C = 2.233e-5; // C334 (22uF) + C335 (0.33uF)
	fc = 1.0 / (PT2_TWO_PI * R * C); // cutoff = ~5.13Hz
	calcRCFilterCoeffs(dAudioFreq, fc, &filterHiA500);
	// A1200 1-pole (6dB/oct) static RC high-pass filter:
	R = 1390.0; // R324 (1K ohm resistor) + R325 (390 ohm resistor)
	C = 2.2e-5; // C334 (22uF capacitor)
	fc = 1.0 / (PT2_TWO_PI * R * C); // cutoff = ~5.20Hz
	calcRCFilterCoeffs(dAudioFreq, fc, &filterHiA1200);
}
void mixerSetStereoSeparation(uint8_t percentage) // 0..100 (percentage)
{
	assert(percentage <= 100);
	stereoSeparation = percentage;
	dSideFactor = (percentage / 100.0) * STEREO_NORM_FACTOR;
}
static double ciaBpm2Hz(int32_t bpm)
{
	if (bpm == 0)
		return 0.0;
	const uint32_t ciaPeriod = 1773447 / bpm; // yes, PT truncates here
	return (double)CIA_PAL_CLK / (ciaPeriod+1); // +1, CIA triggers on underflow
}
static void generateBpmTables(bool vblankTimingFlag)
{
	for (int32_t bpm = 32; bpm <= 255; bpm++)
	{
		double dBpmHz;
		
		if (vblankTimingFlag)
			dBpmHz = AMIGA_PAL_VBLANK_HZ;
		else
			dBpmHz = ciaBpm2Hz(bpm);
		const double dSamplesPerTick      = audio.outputRate / dBpmHz;
		const double dSamplesPerTick28kHz = PAT2SMP_HI_FREQ  / dBpmHz; // PAT2SMP hi quality
		const double dSamplesPerTick20kHz = PAT2SMP_LO_FREQ  / dBpmHz; // PAT2SMP low quality
		// convert to rounded 32.32 fixed-point
		const int32_t i = bpm - 32;
		audio.bpmTable[i]      = (int64_t)((dSamplesPerTick      * (UINT32_MAX+1.0)) + 0.5);
		audio.bpmTable28kHz[i] = (int64_t)((dSamplesPerTick28kHz * (UINT32_MAX+1.0)) + 0.5);
		audio.bpmTable20kHz[i] = (int64_t)((dSamplesPerTick20kHz * (UINT32_MAX+1.0)) + 0.5);
	}
}
static void generateTickLengthTable(bool vblankTimingFlag)
{
	for (int32_t bpm = 32; bpm <= 255; bpm++)
	{
		double dHz;
		if (vblankTimingFlag)
			dHz = AMIGA_PAL_VBLANK_HZ;
		else
			dHz = ciaBpm2Hz(bpm);
		// BPM -> Hz -> tick length for performance counter (syncing visuals to audio)
		double dTimeInt;
		double dTimeFrac = modf(hpcFreq.dFreq / dHz, &dTimeInt);
		const int32_t timeInt = (int32_t)dTimeInt;
	
		dTimeFrac = floor((dTimeFrac * (UINT32_MAX+1.0)) + 0.5); // fractional part (scaled to 0..2^32-1)
		audio.tickLengthTable[bpm-32] = ((uint64_t)timeInt << 32) | (uint32_t)dTimeFrac;
	}
}
void updateReplayerTimingMode(void)
{
	const bool audioWasntLocked = !audio.locked;
	if (audioWasntLocked)
		lockAudio();
	const bool vblankTimingMode = (editor.timingMode == TEMPO_MODE_VBLANK);
	generateBpmTables(vblankTimingMode);
	generateTickLengthTable(vblankTimingMode);
	if (audioWasntLocked)
		unlockAudio();
}
bool setupAudio(void)
{
	SDL_AudioSpec want, have;
	want.freq = config.soundFrequency;
	want.samples = (uint16_t)config.soundBufferSize;
	want.format = AUDIO_S16;
	want.channels = 2;
	want.callback = audioCallback;
	want.userdata = NULL;
	dev = SDL_OpenAudioDevice(NULL, 0, &want, &have, 0);
	if (dev == 0)
	{
		showErrorMsgBox("Unable to open audio device: %s", SDL_GetError());
		return false;
	}
	// lower than this is not safe for the BLEP synthesis in the mixer
	const int32_t minFreq = (int32_t)(PAULA_PAL_CLK / 113.0 / 2.0)+1; // /2 because we do 2x oversampling
	if (have.freq < minFreq)
	{
		showErrorMsgBox("Unable to open audio: An audio rate below %dHz can't be used!", minFreq);
		return false;
	}
	if (have.format != want.format)
	{
		showErrorMsgBox("Unable to open audio: The sample format (signed 16-bit) couldn't be used!");
		return false;
	}
	audio.outputRate = have.freq;
	audio.audioBufferSize = have.samples;
	audio.dPeriodToDeltaDiv = (double)PAULA_PAL_CLK / audio.outputRate;
	// we do 2x oversampling if the audio output rate is below 96kHz
	audio.oversamplingFlag = (audio.outputRate < 96000);
	updateReplayerTimingMode(); // also generates the BPM tables used below
	const int32_t lowestBPM = 32;
	const int32_t pat2SmpMaxSamples = (audio.bpmTable20kHz[lowestBPM-32] + (1LL + 31)) >> 32; // ceil (rounded upwards)
	const int32_t renderMaxSamples = (audio.bpmTable[lowestBPM-32] + (1LL + 31)) >> 32; // ceil (rounded upwards)
	const int32_t maxSamplesToMix = MAX(pat2SmpMaxSamples, renderMaxSamples) * 2; // *2 because PAT2SMP uses 2x oversampling all the time
	dMixBufferLUnaligned = (double *)MALLOC_PAD(maxSamplesToMix * sizeof (double), 256);
	dMixBufferRUnaligned = (double *)MALLOC_PAD(maxSamplesToMix * sizeof (double), 256);
	if (dMixBufferLUnaligned == NULL || dMixBufferRUnaligned == NULL)
	{
		showErrorMsgBox("Out of memory!");
		return false;
	}
	dMixBufferL = (double *)ALIGN_PTR(dMixBufferLUnaligned, 256);
	dMixBufferR = (double *)ALIGN_PTR(dMixBufferRUnaligned, 256);
	mixerSetStereoSeparation(config.stereoSeparation);
	filterModel = config.filterModel;
	ledFilterEnabled = false;
	calculateFilterCoeffs();
	audio.samplesPerTick64 = audio.bpmTable[125-32]; // BPM 125
	audio.tickSampleCounter64 = 0; // zero tick sample counter so that it will instantly initiate a tick
	calcAudioLatencyVars(audio.audioBufferSize, audio.outputRate);
	resetCachedMixerPeriod();
	clearMixerDownsamplerStates();
	audio.resetSyncTickTimeFlag = true;
	updateFilterFunc();
	SDL_PauseAudioDevice(dev, false);
	return true;
}
void audioClose(void)
{
	if (dev > 0)
	{
		SDL_PauseAudioDevice(dev, true);
		SDL_CloseAudioDevice(dev);
		dev = 0;
	}
	if (dMixBufferLUnaligned != NULL)
	{
		free(dMixBufferLUnaligned);
		dMixBufferLUnaligned = NULL;
	}
	if (dMixBufferRUnaligned != NULL)
	{
		free(dMixBufferRUnaligned);
		dMixBufferRUnaligned = NULL;
	}
}
void toggleAmigaPanMode(void)
{
	const bool audioWasntLocked = !audio.locked;
	if (audioWasntLocked)
		lockAudio();
	amigaPanFlag ^= 1;
	if (!amigaPanFlag)
	{
		mixerSetStereoSeparation(config.stereoSeparation);
		displayMsg("AMIGA PANNING OFF");
	}
	else
	{
		mixerSetStereoSeparation(100);
		displayMsg("AMIGA PANNING ON");
	}
	if (audioWasntLocked)
		unlockAudio();
}
uint16_t get16BitPeak(int16_t *sampleData, uint32_t sampleLength)
{
	uint16_t samplePeak = 0;
	for (uint32_t i = 0; i < sampleLength; i++)
	{
		uint16_t sample = ABS(sampleData[i]);
		if (samplePeak < sample)
			samplePeak = sample;
	}
	return samplePeak;
}
uint32_t get32BitPeak(int32_t *sampleData, uint32_t sampleLength)
{
	uint32_t samplePeak = 0;
	for (uint32_t i = 0; i < sampleLength; i++)
	{
		uint32_t sample = ABS(sampleData[i]);
		if (samplePeak < sample)
			samplePeak = sample;
	}
	return samplePeak;
}
float getFloatPeak(float *fSampleData, uint32_t sampleLength)
{
	float fSamplePeak = 0.0f;
	for (uint32_t i = 0; i < sampleLength; i++)
	{
		const float fSample = fabsf(fSampleData[i]);
		if (fSamplePeak < fSample)
			fSamplePeak = fSample;
	}
	return fSamplePeak;
}
double getDoublePeak(double *dSampleData, uint32_t sampleLength)
{
	double dSamplePeak = 0.0;
	for (uint32_t i = 0; i < sampleLength; i++)
	{
		const double dSample = fabs(dSampleData[i]);
		if (dSamplePeak < dSample)
			dSamplePeak = dSample;
	}
	return dSamplePeak;
}
void normalize16BitTo8Bit(int16_t *sampleData, uint32_t sampleLength)
{
	const uint16_t samplePeak = get16BitPeak(sampleData, sampleLength);
	if (samplePeak == 0 || samplePeak >= INT16_MAX)
		return;
	const double dGain = (double)INT16_MAX / samplePeak;
	for (uint32_t i = 0; i < sampleLength; i++)
	{
		const int32_t sample = (const int32_t)(sampleData[i] * dGain);
		sampleData[i] = (int16_t)sample;
	}
}
void normalize32BitTo8Bit(int32_t *sampleData, uint32_t sampleLength)
{
	const uint32_t samplePeak = get32BitPeak(sampleData, sampleLength);
	if (samplePeak == 0 || samplePeak >= INT32_MAX)
		return;
	const double dGain = (double)INT32_MAX / samplePeak;
	for (uint32_t i = 0; i < sampleLength; i++)
	{
		const int32_t sample = (const int32_t)(sampleData[i] * dGain);
		sampleData[i] = (int32_t)sample;
	}
}
void normalizeFloatTo8Bit(float *fSampleData, uint32_t sampleLength)
{
	const float fSamplePeak = getFloatPeak(fSampleData, sampleLength);
	if (fSamplePeak <= 0.0f)
		return;
	const float fGain = INT8_MAX / fSamplePeak;
	for (uint32_t i = 0; i < sampleLength; i++)
		fSampleData[i] *= fGain;
}
void normalizeDoubleTo8Bit(double *dSampleData, uint32_t sampleLength)
{
	const double dSamplePeak = getDoublePeak(dSampleData, sampleLength);
	if (dSamplePeak <= 0.0)
		return;
	const double dGain = INT8_MAX / dSamplePeak;
	for (uint32_t i = 0; i < sampleLength; i++)
		dSampleData[i] *= dGain;
}