ref: 48da1fbedb5db4c173875542774f13b93b7b44a1
parent: 584ded059ff4b7c88d4950fae0fe675c04a090c1
author: Olav Sørensen <olav.sorensen@live.no>
date: Sun Feb 27 09:16:52 EST 2022
Code refactoring: Use new ft2_hpc.c file for HPC timers
--- a/src/ft2_events.c
+++ b/src/ft2_events.c
@@ -48,43 +48,42 @@
static HWND hWnd;
static HANDLE oneInstHandle, hMapFile;
static LPCTSTR sharedMemBuf;
-
-// used for Windows usleep() implementation
-static NTSTATUS (__stdcall *NtDelayExecution)(BOOL Alertable, PLARGE_INTEGER DelayInterval);
#endif
static void handleInput(void);
-// usleep() implementation for Windows (Warning: This might not be future-safe!)
-#ifdef _WIN32
-void usleep(uint32_t usec)
+#ifdef _WIN32 // Windows usleep() implementation
+
+static NTSTATUS (__stdcall *NtDelayExecution)(BOOL Alertable, PLARGE_INTEGER DelayInterval);
+static NTSTATUS (__stdcall *NtQueryTimerResolution)(PULONG MinimumResolution, PULONG MaximumResolution, PULONG ActualResolution);
+static NTSTATUS (__stdcall *NtSetTimerResolution)(ULONG DesiredResolution, BOOLEAN SetResolution, PULONG CurrentResolution);
+
+static void (*usleep)(int32_t usec);
+
+static void usleepGood(int32_t usec)
{
- LARGE_INTEGER lpDueTime;
+ LARGE_INTEGER delayInterval;
- if (NtDelayExecution == NULL)
- {
- // NtDelayExecution() is not available (shouldn't happen), use regular sleep()
- Sleep(usec / 1000);
- }
- else
- {
- // this prevents a 64-bit MUL (will not overflow with the ranges we use anyway)
- lpDueTime.HighPart = UINT32_MAX;
- lpDueTime.LowPart = (DWORD)(-10 * (int32_t)usec);
+ // NtDelayExecution() delays in 100ns-units, and negative value = delay from current time
+ usec *= -10;
- NtDelayExecution(false, &lpDueTime);
- }
+ delayInterval.HighPart = 0xFFFFFFFF;
+ delayInterval.LowPart = usec;
+ NtDelayExecution(false, &delayInterval);
}
-void setupWin32Usleep(void)
+static void usleepWeak(int32_t usec) // fallback if no NtDelayExecution()
{
- NtDelayExecution = (NTSTATUS (__stdcall *)(BOOL, PLARGE_INTEGER))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtDelayExecution");
- timeBeginPeriod(0); // enter highest timer resolution
+ Sleep((usec + 500) / 1000);
}
-void freeWin32Usleep(void)
+void windowsSetupUsleep(void)
{
- timeEndPeriod(0); // exit highest timer resolution
+ NtDelayExecution = (NTSTATUS (__stdcall *)(BOOL, PLARGE_INTEGER))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtDelayExecution");
+ NtQueryTimerResolution = (NTSTATUS (__stdcall *)(PULONG, PULONG, PULONG))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtQueryTimerResolution");
+ NtSetTimerResolution = (NTSTATUS (__stdcall *)(ULONG, BOOLEAN, PULONG))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtSetTimerResolution");
+
+ usleep = (NtDelayExecution != NULL) ? usleepGood : usleepWeak;
}
#endif
@@ -402,17 +401,9 @@
else if (event->window.event == SDL_WINDOWEVENT_SHOWN)
video.windowHidden = false;
- if (video.vsync60HzPresent)
- {
- /* If we minimize the window and vsync is present, vsync is temporarily turned off.
- ** recalc waitVBL() vars so that it can sleep properly in said mode.
- */
- if (event->window.event == SDL_WINDOWEVENT_MINIMIZED ||
- event->window.event == SDL_WINDOWEVENT_FOCUS_LOST)
- {
- setupWaitVBL();
- }
- }
+ // reset vblank end time if we minimize window
+ if (event->window.event == SDL_WINDOWEVENT_MINIMIZED || event->window.event == SDL_WINDOWEVENT_FOCUS_LOST)
+ hpc_ResetEndTime(&video.vblankHpc);
}
}
--- a/src/ft2_events.h
+++ b/src/ft2_events.h
@@ -20,7 +20,4 @@
#ifdef _WIN32
bool handleSingleInstancing(int32_t argc, char **argv);
void closeSingleInstancing(void);
-void usleep(uint32_t usec);
-void setupWin32Usleep(void);
-void freeWin32Usleep(void);
#endif
--- /dev/null
+++ b/src/ft2_hpc.c
@@ -1,0 +1,127 @@
+/*
+** Hardware Performance Counter delay routines
+*/
+
+#ifdef _WIN32
+#define WIN32_MEAN_AND_LEAN
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+#include <SDL2/SDL.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include "ft2_hpc.h"
+
+#define FRAC_BITS 53
+#define FRAC_SCALE (1ULL << FRAC_BITS)
+#define FRAC_MASK (FRAC_SCALE-1)
+
+hpcFreq_t hpcFreq;
+
+#ifdef _WIN32 // Windows usleep() implementation
+
+static NTSTATUS (__stdcall *NtDelayExecution)(BOOL Alertable, PLARGE_INTEGER DelayInterval);
+static NTSTATUS (__stdcall *NtQueryTimerResolution)(PULONG MinimumResolution, PULONG MaximumResolution, PULONG ActualResolution);
+static NTSTATUS (__stdcall *NtSetTimerResolution)(ULONG DesiredResolution, BOOLEAN SetResolution, PULONG CurrentResolution);
+
+static void (*usleep)(int32_t usec);
+
+static void usleepGood(int32_t usec)
+{
+ LARGE_INTEGER delayInterval;
+
+ // NtDelayExecution() delays in 100ns-units, and negative value = delay from current time
+ usec *= -10;
+
+ delayInterval.HighPart = 0xFFFFFFFF;
+ delayInterval.LowPart = usec;
+ NtDelayExecution(false, &delayInterval);
+}
+
+static void usleepWeak(int32_t usec) // fallback if no NtDelayExecution()
+{
+ Sleep((usec + 500) / 1000);
+}
+
+static void windowsSetupUsleep(void)
+{
+ NtDelayExecution = (NTSTATUS (__stdcall *)(BOOL, PLARGE_INTEGER))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtDelayExecution");
+ NtQueryTimerResolution = (NTSTATUS (__stdcall *)(PULONG, PULONG, PULONG))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtQueryTimerResolution");
+ NtSetTimerResolution = (NTSTATUS (__stdcall *)(ULONG, BOOLEAN, PULONG))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtSetTimerResolution");
+
+ usleep = (NtDelayExecution != NULL) ? usleepGood : usleepWeak;
+}
+#endif
+
+void hpc_Init(void)
+{
+#ifdef _WIN32
+ windowsSetupUsleep();
+#endif
+ hpcFreq.freq64 = SDL_GetPerformanceFrequency();
+ hpcFreq.dFreq = (double)hpcFreq.freq64;
+ hpcFreq.dFreqMulMicro = (1000.0 * 1000.0) / hpcFreq.dFreq;
+}
+
+void hpc_SetDurationInHz(hpc_t *hpc, const double dHz)
+{
+ const double dDuration = hpcFreq.dFreq / dHz;
+
+ // break down duration into integer and frac parts
+ double dDurationInt;
+ double dDurationFrac = modf(dDuration, &dDurationInt);
+
+ // set 64:53fp values
+ hpc->duration64Int = (uint64_t)dDurationInt;
+ hpc->duration64Frac = (uint64_t)round(dDurationFrac * FRAC_SCALE);
+}
+
+void hpc_ResetEndTime(hpc_t *hpc)
+{
+ hpc->endTime64Int = SDL_GetPerformanceCounter() + hpc->duration64Int;
+ hpc->endTime64Frac = hpc->duration64Frac;
+}
+
+void hpc_Wait(hpc_t *hpc)
+{
+#ifdef _WIN32 // set resolution to 0.5ms (safest minium) - this is confirmed to improve NtDelayExecution() and Sleep()
+ ULONG originalTimerResolution, minRes, maxRes, curRes;
+
+ if (NtQueryTimerResolution != NULL && NtSetTimerResolution != NULL)
+ {
+ if (!NtQueryTimerResolution(&minRes, &maxRes, &originalTimerResolution))
+ {
+ if (originalTimerResolution != 5000 && maxRes <= 5000)
+ NtSetTimerResolution(5000, TRUE, &curRes); // set to 0.5ms (safest minimum)
+ }
+ }
+#endif
+
+ const uint64_t currTime64 = SDL_GetPerformanceCounter();
+ if (currTime64 < hpc->endTime64Int)
+ {
+ uint64_t timeLeft64 = hpc->endTime64Int - currTime64;
+
+ // convert to int32_t for fast SSE2 SIMD usage lateron
+ if (timeLeft64 > INT32_MAX)
+ timeLeft64 = INT32_MAX;
+
+ const int32_t timeLeft32 = (int32_t)timeLeft64;
+
+ int32_t microSecsLeft = (int32_t)((timeLeft32 * hpcFreq.dFreqMulMicro) + 0.5); // rounded
+ if (microSecsLeft > 0)
+ usleep(microSecsLeft);
+ }
+
+ // set next end time
+
+ hpc->endTime64Int += hpc->duration64Int;
+
+ hpc->endTime64Frac += hpc->duration64Frac;
+ if (hpc->endTime64Frac >= FRAC_SCALE)
+ {
+ hpc->endTime64Frac &= FRAC_MASK;
+ hpc->endTime64Int++;
+ }
+}
--- /dev/null
+++ b/src/ft2_hpc.h
@@ -1,0 +1,23 @@
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+
+typedef struct
+{
+ uint64_t freq64;
+ double dFreq, dFreqMulMicro;
+} hpcFreq_t;
+
+typedef struct
+{
+ uint64_t duration64Int, duration64Frac;
+ uint64_t endTime64Int, endTime64Frac;
+} hpc_t;
+
+extern hpcFreq_t hpcFreq;
+
+void hpc_Init(void);
+void hpc_SetDurationInHz(hpc_t *hpc, double dHz);
+void hpc_ResetEndTime(hpc_t *hpc);
+void hpc_Wait(hpc_t *hpc);
--- a/src/ft2_main.c
+++ b/src/ft2_main.c
@@ -33,12 +33,12 @@
#include "ft2_events.h"
#include "ft2_bmp.h"
#include "ft2_structs.h"
+#include "ft2_hpc.h"
#ifdef HAS_MIDI
static SDL_Thread *initMidiThread;
#endif
-static void setupPerfFreq(void);
static void initializeVars(void);
static void cleanUpAndExit(void); // never call this inside the main loop
#ifdef __APPLE__
@@ -117,7 +117,6 @@
return 0;
}
- setupWin32Usleep();
disableWasapi(); // disable problematic WASAPI SDL2 audio driver on Windows (causes clicks/pops sometimes...)
// 13.03.2020: This is still needed with SDL 2.0.12...
#endif
@@ -144,6 +143,9 @@
*/
SDL_StopTextInput();
+ hpc_Init();
+ hpc_SetDurationInHz(&video.vblankHpc, VBLANK_HZ);
+
#ifdef __APPLE__
osxSetDirToProgramDirFromArgs(argv);
#endif
@@ -178,8 +180,6 @@
audio.currOutputDevice = getAudioOutputDeviceFromConfig();
audio.currInputDevice = getAudioInputDeviceFromConfig();
- setupPerfFreq();
-
if (!setupAudio(CONFIG_HIDE_ERRORS)) // can we open the audio device?
{
// nope, try with the default audio device
@@ -232,11 +232,11 @@
SDL_DetachThread(initMidiThread); // don't wait for this thread, let it clean up when done
#endif
- setupWaitVBL(); // this is needed for potential okBox() calls in handleModuleLoadFromArg()
+ hpc_ResetEndTime(&video.vblankHpc); // this is needed for potential okBox() calls in handleModuleLoadFromArg()
handleModuleLoadFromArg(argc, argv);
editor.mainLoopOngoing = true;
- setupWaitVBL(); // this must be the very last thing done before entering the main loop
+ hpc_ResetEndTime(&video.vblankHpc); // this must be the very last thing done before entering the main loop
while (editor.programRunning)
{
@@ -377,7 +377,6 @@
}
#ifdef _WIN32
- freeWin32Usleep();
closeSingleInstancing();
#endif
@@ -416,28 +415,6 @@
}
}
#endif
-
-static void setupPerfFreq(void)
-{
- double dInt;
-
- const uint64_t perfFreq64 = SDL_GetPerformanceFrequency();
- assert(perfFreq64 != 0);
-
- editor.dPerfFreq = (double)perfFreq64;
- editor.dPerfFreqMulMicro = 1000000.0 / editor.dPerfFreq;
- editor.dPerfFreqMulMs = 1.0 / (editor.dPerfFreq / 1000.0);
-
- // calculate vblank time for performance counters and split into int/frac
- double dFrac = modf(editor.dPerfFreq / VBLANK_HZ, &dInt);
-
- // integer part
- video.vblankTimeLen = (int32_t)dInt;
-
- // fractional part scaled to 0..2^32-1
- dFrac *= UINT32_MAX+1.0;
- video.vblankTimeLenFrac = (uint32_t)dFrac;
-}
#ifdef _WIN32
static void disableWasapi(void)
--- a/src/ft2_video.c
+++ b/src/ft2_video.c
@@ -48,7 +48,6 @@
static bool songIsModified;
static char wndTitle[256];
-static uint64_t timeNext64, timeNext64Frac;
static sprite_t sprites[SPRITE_NUM];
// for FPS counter
@@ -180,7 +179,8 @@
if (!video.vsync60HzPresent)
{
- waitVBL(); // we have no VSync, do crude thread sleeping to sync to ~60Hz
+ // we have no VSync, do crude thread sleeping to sync to ~60Hz
+ hpc_Wait(&video.vblankHpc);
}
else
{
@@ -190,14 +190,14 @@
#ifdef __APPLE__
// macOS: VSync gets disabled if the window is 100% covered by another window. Let's add a (crude) fix:
if (minimized || !(windowFlags & SDL_WINDOW_INPUT_FOCUS))
- waitVBL();
+ hpc_Wait(&video.vblankHpc);
#elif __unix__
// *NIX: VSync gets disabled in fullscreen mode (at least on some distros/systems). Let's add a fix:
if (minimized || video.fullscreen)
- waitVBL();
+ hpc_Wait(&video.vblankHpc);
#else
if (minimized)
- waitVBL();
+ hpc_Wait(&video.vblankHpc);
#endif
}
@@ -719,44 +719,6 @@
clr32 += srcPitch;
dst32 += dstPitch;
}
- }
-}
-
-void setupWaitVBL(void)
-{
- // set next frame time
- timeNext64 = SDL_GetPerformanceCounter() + video.vblankTimeLen;
- timeNext64Frac = video.vblankTimeLenFrac;
-}
-
-void waitVBL(void)
-{
- // this routine almost never delays if we have 60Hz vsync, but it's still needed in some occasions
-
- uint64_t time64 = SDL_GetPerformanceCounter();
- if (time64 < timeNext64)
- {
- time64 = timeNext64 - time64;
- if (time64 > INT32_MAX)
- time64 = INT32_MAX;
-
- const int32_t diff32 = (int32_t)time64;
-
- // convert and round to microseconds
- const int32_t time32 = (int32_t)((diff32 * editor.dPerfFreqMulMicro) + 0.5);
-
- // delay until we have reached the next frame
- if (time32 > 0)
- usleep(time32);
- }
-
- // update next frame time
- timeNext64 += video.vblankTimeLen;
- timeNext64Frac += video.vblankTimeLenFrac;
- if (timeNext64Frac > UINT32_MAX)
- {
- timeNext64Frac &= UINT32_MAX;
- timeNext64++;
}
}
--- a/src/ft2_video.h
+++ b/src/ft2_video.h
@@ -5,6 +5,7 @@
#include "ft2_header.h"
#include "ft2_palette.h"
#include "ft2_audio.h"
+#include "ft2_hpc.h"
enum
{
@@ -20,10 +21,11 @@
{
bool fullscreen, showFPSCounter, useDesktopMouseCoords;
uint32_t xScale, yScale;
- uint32_t *frameBuffer, palette[PAL_NUM], vblankTimeLen, vblankTimeLenFrac;
+ uint32_t *frameBuffer, palette[PAL_NUM];
#ifdef _WIN32
HWND hWnd;
#endif
+ hpc_t vblankHpc;
SDL_Window *window;
double dMonitorRefreshRate;
float fMouseXMul, fMouseYMul;
@@ -74,5 +76,3 @@
void setWindowSizeFromConfig(bool updateRenderer);
bool recreateTexture(void);
void toggleFullScreen(void);
-void setupWaitVBL(void);
-void waitVBL(void);
--- a/src/scopes/ft2_scopes.c
+++ b/src/scopes/ft2_scopes.c
@@ -20,12 +20,12 @@
#include "../ft2_video.h"
#include "../ft2_tables.h"
#include "../ft2_structs.h"
+#include "../ft2_hpc.h"
#include "ft2_scopes.h"
#include "ft2_scopedraw.h"
static volatile bool scopesUpdatingFlag, scopesDisplayingFlag;
-static uint32_t scopeTimeLen, scopeTimeLenFrac;
-static uint64_t timeNext64, timeNext64Frac;
+static hpc_t scopeHpc;
static volatile scope_t scope[MAX_CHANNELS];
static SDL_Thread *scopeThread;
@@ -520,9 +520,8 @@
// this is needed for scope stability (confirmed)
SDL_SetThreadPriority(SDL_THREAD_PRIORITY_HIGH);
- // set next frame time
- timeNext64 = SDL_GetPerformanceCounter() + scopeTimeLen;
- timeNext64Frac = scopeTimeLenFrac;
+ hpc_SetDurationInHz(&scopeHpc, SCOPE_HZ);
+ hpc_ResetEndTime(&scopeHpc);
while (editor.programRunning)
{
@@ -530,31 +529,7 @@
updateScopes();
editor.scopeThreadBusy = false;
- uint64_t time64 = SDL_GetPerformanceCounter();
- if (time64 < timeNext64)
- {
- time64 = timeNext64 - time64;
- if (time64 > INT32_MAX)
- time64 = INT32_MAX;
-
- const int32_t diff32 = (int32_t)time64;
-
- // convert and round to microseconds
- const int32_t time32 = (int32_t)((diff32 * editor.dPerfFreqMulMicro) + 0.5);
-
- // delay until we have reached the next frame
- if (time32 > 0)
- usleep(time32);
- }
-
- // update next tick time
- timeNext64 += scopeTimeLen;
- timeNext64Frac += scopeTimeLenFrac;
- if (timeNext64Frac > UINT32_MAX)
- {
- timeNext64Frac &= UINT32_MAX;
- timeNext64++;
- }
+ hpc_Wait(&scopeHpc);
}
(void)ptr;
@@ -563,18 +538,6 @@
bool initScopes(void)
{
- double dInt;
-
- // calculate scope time for performance counters and split into int/frac
- double dFrac = modf(editor.dPerfFreq / SCOPE_HZ, &dInt);
-
- // integer part
- scopeTimeLen = (int32_t)dInt;
-
- // fractional part (scaled to 0..2^32-1)
- dFrac *= UINT32_MAX+1.0;
- scopeTimeLenFrac = (uint32_t)dFrac;
-
scopeThread = SDL_CreateThread(scopeThreadFunc, NULL, NULL);
if (scopeThread == NULL)
{
--- a/vs2019_project/ft2-clone/ft2-clone.vcxproj
+++ b/vs2019_project/ft2-clone/ft2-clone.vcxproj
@@ -299,6 +299,7 @@
<ClCompile Include="..\..\src\ft2_events.c" />
<ClCompile Include="..\..\src\ft2_gui.c" />
<ClCompile Include="..\..\src\ft2_help.c" />
+ <ClCompile Include="..\..\src\ft2_hpc.c" />
<ClCompile Include="..\..\src\ft2_inst_ed.c" />
<ClCompile Include="..\..\src\ft2_keyboard.c" />
<ClCompile Include="..\..\src\ft2_main.c" />
@@ -394,6 +395,7 @@
<ClInclude Include="..\..\src\ft2_gui.h" />
<ClInclude Include="..\..\src\ft2_header.h" />
<ClInclude Include="..\..\src\ft2_help.h" />
+ <ClInclude Include="..\..\src\ft2_hpc.h" />
<ClInclude Include="..\..\src\ft2_inst_ed.h" />
<ClInclude Include="..\..\src\ft2_keyboard.h" />
<ClInclude Include="..\..\src\ft2_midi.h" />
--- a/vs2019_project/ft2-clone/ft2-clone.vcxproj.filters
+++ b/vs2019_project/ft2-clone/ft2-clone.vcxproj.filters
@@ -163,6 +163,7 @@
<ClCompile Include="..\..\src\scopes\ft2_scopes.c">
<Filter>scopes</Filter>
</ClCompile>
+ <ClCompile Include="..\..\src\ft2_hpc.c" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\src\rtmidi\RtMidi.h">
@@ -319,6 +320,9 @@
<Filter>scopes</Filter>
</ClInclude>
<ClInclude Include="..\..\src\ft2_cpu.h">
+ <Filter>headers</Filter>
+ </ClInclude>
+ <ClInclude Include="..\..\src\ft2_hpc.h">
<Filter>headers</Filter>
</ClInclude>
</ItemGroup>