From ccab94c765c5518e9ed8aa3a04b10c2d6a2d9159 Mon Sep 17 00:00:00 2001 From: Emill Date: Sun, 29 May 2022 18:16:23 +0200 Subject: [PATCH] Implement AnyFPS + option to queue one rendered frame (#401) --- .../Lib/Fast3D/gfx_direct3d11.cpp | 37 +++-- .../libultraship/Lib/Fast3D/gfx_dxgi.cpp | 145 ++++++++++++------ .../libultraship/Lib/Fast3D/gfx_dxgi.h | 8 +- .../libultraship/Lib/Fast3D/gfx_pc.cpp | 15 +- libultraship/libultraship/Lib/Fast3D/gfx_pc.h | 4 +- .../libultraship/Lib/Fast3D/gfx_sdl2.cpp | 39 +++-- .../Lib/Fast3D/gfx_window_manager_api.h | 4 +- libultraship/libultraship/SohImGuiImpl.cpp | 69 ++++++++- libultraship/libultraship/Window.cpp | 11 +- libultraship/libultraship/Window.h | 3 +- soh/soh/OTRGlobals.cpp | 40 ++++- soh/soh/frame_interpolation.cpp | 2 +- soh/src/code/z_actor.c | 2 +- .../ovl_file_choose/z_file_choose.c | 8 + 14 files changed, 298 insertions(+), 89 deletions(-) diff --git a/libultraship/libultraship/Lib/Fast3D/gfx_direct3d11.cpp b/libultraship/libultraship/Lib/Fast3D/gfx_direct3d11.cpp index 4664a0faa..f1661f426 100644 --- a/libultraship/libultraship/Lib/Fast3D/gfx_direct3d11.cpp +++ b/libultraship/libultraship/Lib/Fast3D/gfx_direct3d11.cpp @@ -96,7 +96,6 @@ static struct { uint32_t msaa_num_quality_levels[D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT]; ComPtr device; - ComPtr swap_chain; ComPtr context; ComPtr rasterizer_state; ComPtr depth_stencil_state; @@ -252,7 +251,24 @@ static void gfx_d3d11_init(void) { }); // Create the swap chain - d3d.swap_chain = gfx_dxgi_create_swap_chain(d3d.device.Get()); + gfx_dxgi_create_swap_chain(d3d.device.Get(), []() { + d3d.framebuffers[0].render_target_view.Reset(); + d3d.textures[d3d.framebuffers[0].texture_id].texture.Reset(); + d3d.context->ClearState(); + d3d.context->Flush(); + + d3d.last_shader_program = nullptr; + d3d.last_vertex_buffer_stride = 0; + d3d.last_blend_state.Reset(); + d3d.last_resource_views[0].Reset(); + d3d.last_resource_views[1].Reset(); + d3d.last_sampler_states[0].Reset(); + d3d.last_sampler_states[1].Reset(); + d3d.last_depth_test = -1; + d3d.last_depth_mask = -1; + d3d.last_zmode_decal = -1; + d3d.last_primitive_topology = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + }); // Create D3D Debug device if in debug mode @@ -266,7 +282,7 @@ static void gfx_d3d11_init(void) { // Check the size of the window DXGI_SWAP_CHAIN_DESC1 swap_chain_desc; - ThrowIfFailed(d3d.swap_chain->GetDesc1(&swap_chain_desc)); + ThrowIfFailed(gfx_dxgi_get_swap_chain()->GetDesc1(&swap_chain_desc)); d3d.textures[fb.texture_id].width = swap_chain_desc.Width; d3d.textures[fb.texture_id].height = swap_chain_desc.Height; fb.msaa_level = 1; @@ -303,8 +319,6 @@ static void gfx_d3d11_init(void) { ThrowIfFailed(d3d.device->CreateBuffer(&constant_buffer_desc, nullptr, d3d.per_frame_cb.GetAddressOf()), gfx_dxgi_get_h_wnd(), "Failed to create per-frame constant buffer."); - d3d.context->PSSetConstantBuffers(0, 1, d3d.per_frame_cb.GetAddressOf()); - // Create per-draw constant buffer constant_buffer_desc.Usage = D3D11_USAGE_DYNAMIC; @@ -316,8 +330,6 @@ static void gfx_d3d11_init(void) { ThrowIfFailed(d3d.device->CreateBuffer(&constant_buffer_desc, nullptr, d3d.per_draw_cb.GetAddressOf()), gfx_dxgi_get_h_wnd(), "Failed to create per-draw constant buffer."); - d3d.context->PSSetConstantBuffers(1, 1, d3d.per_draw_cb.GetAddressOf()); - // Create compute shader that can be used to retrieve depth buffer values const char* shader_source = R"( @@ -737,6 +749,8 @@ static void gfx_d3d11_on_resize(void) { static void gfx_d3d11_start_frame(void) { // Set per-frame constant buffer + ID3D11Buffer* buffers[2] = { d3d.per_frame_cb.Get(), d3d.per_draw_cb.Get() }; + d3d.context->PSSetConstantBuffers(0, 2, buffers); d3d.per_frame_cb_data.noise_frame++; if (d3d.per_frame_cb_data.noise_frame > 150) { @@ -803,15 +817,16 @@ static void gfx_d3d11_update_framebuffer_parameters(int fb_id, uint32_t width, u if (msaa_level <= 1) { ThrowIfFailed(d3d.device->CreateShaderResourceView(tex.texture.Get(), nullptr, tex.resource_view.ReleaseAndGetAddressOf())); } - } else if (diff) { + } else if (diff || (render_target && tex.texture.Get() == nullptr)) { DXGI_SWAP_CHAIN_DESC1 desc1; - ThrowIfFailed(d3d.swap_chain->GetDesc1(&desc1)); + IDXGISwapChain1* swap_chain = gfx_dxgi_get_swap_chain(); + ThrowIfFailed(swap_chain->GetDesc1(&desc1)); if (desc1.Width != width || desc1.Height != height) { fb.render_target_view.Reset(); tex.texture.Reset(); - ThrowIfFailed(d3d.swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, desc1.Flags)); + ThrowIfFailed(swap_chain->ResizeBuffers(0, 0, 0, DXGI_FORMAT_UNKNOWN, desc1.Flags)); } - ThrowIfFailed(d3d.swap_chain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID *)tex.texture.ReleaseAndGetAddressOf())); + ThrowIfFailed(swap_chain->GetBuffer(0, __uuidof(ID3D11Texture2D), (LPVOID *)tex.texture.ReleaseAndGetAddressOf())); } if (render_target) { ThrowIfFailed(d3d.device->CreateRenderTargetView(tex.texture.Get(), nullptr, fb.render_target_view.ReleaseAndGetAddressOf())); diff --git a/libultraship/libultraship/Lib/Fast3D/gfx_dxgi.cpp b/libultraship/libultraship/Lib/Fast3D/gfx_dxgi.cpp index 49abe0b28..d257d7b08 100644 --- a/libultraship/libultraship/Lib/Fast3D/gfx_dxgi.cpp +++ b/libultraship/libultraship/Lib/Fast3D/gfx_dxgi.cpp @@ -34,15 +34,8 @@ #define WINCLASS_NAME L"N64GAME" #define GFX_API_NAME "DirectX" -#ifdef VERSION_EU -#define FRAME_INTERVAL_US_NUMERATOR_ 60000 -#define FRAME_INTERVAL_US_DENOMINATOR 3 -#else -#define FRAME_INTERVAL_US_NUMERATOR_ 50000 -#define FRAME_INTERVAL_US_DENOMINATOR 3 -#endif - -#define FRAME_INTERVAL_US_NUMERATOR (FRAME_INTERVAL_US_NUMERATOR_ * dxgi.frame_divisor) +#define FRAME_INTERVAL_NS_NUMERATOR 1000000000 +#define FRAME_INTERVAL_NS_DENOMINATOR (dxgi.target_fps) using namespace Microsoft::WRL; // For ComPtr @@ -66,14 +59,19 @@ static struct { ComPtr factory; ComPtr swap_chain; HANDLE waitable_object; + ComPtr swap_chain_device; // D3D11 Device or D3D12 Command Queue + std::function before_destroy_swap_chain_fn; uint64_t qpc_init, qpc_freq; - uint64_t frame_timestamp; // in units of 1/FRAME_INTERVAL_US_DENOMINATOR microseconds + uint64_t frame_timestamp; // in units of 1/FRAME_INTERVAL_NS_DENOMINATOR nanoseconds std::map frame_stats; std::set> pending_frame_stats; bool dropped_frame; bool zero_latency; + float detected_hz; UINT length_in_vsync_frames; - uint32_t frame_divisor; + uint32_t target_fps; + uint32_t maximum_frame_latency; + uint32_t applied_maximum_frame_latency; HANDLE timer; bool use_timer; LARGE_INTEGER previous_present_time; @@ -143,6 +141,22 @@ static void run_as_dpi_aware(Fun f) { } } +static void apply_maximum_frame_latency(bool first) { + ComPtr swap_chain2; + if (dxgi.swap_chain->QueryInterface(__uuidof(IDXGISwapChain2), &swap_chain2) == S_OK) { + ThrowIfFailed(swap_chain2->SetMaximumFrameLatency(dxgi.maximum_frame_latency)); + if (first) { + dxgi.waitable_object = swap_chain2->GetFrameLatencyWaitableObject(); + WaitForSingleObject(dxgi.waitable_object, INFINITE); + } + } else { + ComPtr device1; + ThrowIfFailed(dxgi.swap_chain->GetDevice(__uuidof(IDXGIDevice1), &device1)); + ThrowIfFailed(device1->SetMaximumFrameLatency(dxgi.maximum_frame_latency)); + } + dxgi.applied_maximum_frame_latency = dxgi.maximum_frame_latency; +} + static void toggle_borderless_window_full_screen(bool enable, bool call_callback) { // Windows 7 + flip mode + waitable object can't go to exclusive fullscreen, // so do borderless instead. If DWM is enabled, this means we get one monitor @@ -271,7 +285,8 @@ void gfx_dxgi_init(const char *game_name, bool start_in_fullscreen) { dxgi.qpc_init = qpc_init.QuadPart; dxgi.qpc_freq = qpc_freq.QuadPart; - dxgi.frame_divisor = 1; + dxgi.target_fps = 60; + dxgi.maximum_frame_latency = 1; dxgi.timer = CreateWaitableTimer(nullptr, false, nullptr); // Prepare window title @@ -367,8 +382,8 @@ static void gfx_dxgi_handle_events(void) { }*/ } -static uint64_t qpc_to_us(uint64_t qpc) { - return qpc / dxgi.qpc_freq * 1000000 + qpc % dxgi.qpc_freq * 1000000 / dxgi.qpc_freq; +static uint64_t qpc_to_ns(uint64_t qpc) { + return qpc / dxgi.qpc_freq * 1000000000 + qpc % dxgi.qpc_freq * 1000000000 / dxgi.qpc_freq; } static uint64_t qpc_to_100ns(uint64_t qpc) { @@ -406,7 +421,7 @@ static bool gfx_dxgi_start_frame(void) { dxgi.use_timer = false; - dxgi.frame_timestamp += FRAME_INTERVAL_US_NUMERATOR; + dxgi.frame_timestamp += FRAME_INTERVAL_NS_NUMERATOR; if (dxgi.frame_stats.size() >= 2) { DXGI_FRAME_STATISTICS *first = &dxgi.frame_stats.begin()->second; @@ -421,14 +436,16 @@ static bool gfx_dxgi_start_frame(void) { } double estimated_vsync_interval = (double)sync_qpc_diff / (double)sync_vsync_diff; - uint64_t estimated_vsync_interval_us = qpc_to_us(estimated_vsync_interval); - //printf("Estimated vsync_interval: %d\n", (int)estimated_vsync_interval_us); - if (estimated_vsync_interval_us < 2 || estimated_vsync_interval_us > 1000000) { + uint64_t estimated_vsync_interval_ns = qpc_to_ns(estimated_vsync_interval); + //printf("Estimated vsync_interval: %d\n", (int)estimated_vsync_interval_ns); + if (estimated_vsync_interval_ns < 2000 || estimated_vsync_interval_ns > 1000000000) { // Unreasonable, maybe a monitor change - estimated_vsync_interval_us = 16666; - estimated_vsync_interval = estimated_vsync_interval_us * dxgi.qpc_freq / 1000000; + estimated_vsync_interval_ns = 16666666; + estimated_vsync_interval = estimated_vsync_interval_ns * dxgi.qpc_freq / 1000000000; } + dxgi.detected_hz = (float)((double)1000000000 / (double)estimated_vsync_interval_ns); + UINT queued_vsyncs = 0; bool is_first = true; for (const std::pair& p : dxgi.pending_frame_stats) { @@ -440,21 +457,21 @@ static bool gfx_dxgi_start_frame(void) { } uint64_t last_frame_present_end_qpc = (last->SyncQPCTime.QuadPart - dxgi.qpc_init) + estimated_vsync_interval * queued_vsyncs; - uint64_t last_end_us = qpc_to_us(last_frame_present_end_qpc); + uint64_t last_end_ns = qpc_to_ns(last_frame_present_end_qpc); - double vsyncs_to_wait = (double)(int64_t)(dxgi.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR - last_end_us) / estimated_vsync_interval_us; - //printf("ts: %llu, last_end_us: %llu, Init v: %f\n", dxgi.frame_timestamp / 3, last_end_us, vsyncs_to_wait); + double vsyncs_to_wait = (double)(int64_t)(dxgi.frame_timestamp / FRAME_INTERVAL_NS_DENOMINATOR - last_end_ns) / estimated_vsync_interval_ns; + //printf("ts: %llu, last_end_ns: %llu, Init v: %f\n", dxgi.frame_timestamp / 3, last_end_ns, vsyncs_to_wait); if (vsyncs_to_wait <= 0) { // Too late - if ((int64_t)(dxgi.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR - last_end_us) < -66666) { + if ((int64_t)(dxgi.frame_timestamp / FRAME_INTERVAL_NS_DENOMINATOR - last_end_ns) < -66666666) { // The application must have been paused or similar - vsyncs_to_wait = round(((double)FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR) / estimated_vsync_interval_us); + vsyncs_to_wait = round(((double)FRAME_INTERVAL_NS_NUMERATOR / FRAME_INTERVAL_NS_DENOMINATOR) / estimated_vsync_interval_ns); if (vsyncs_to_wait < 1) { vsyncs_to_wait = 1; } - dxgi.frame_timestamp = FRAME_INTERVAL_US_DENOMINATOR * (last_end_us + vsyncs_to_wait * estimated_vsync_interval_us); + dxgi.frame_timestamp = FRAME_INTERVAL_NS_DENOMINATOR * (last_end_ns + vsyncs_to_wait * estimated_vsync_interval_ns); } else { // Drop frame //printf("Dropping frame\n"); @@ -464,9 +481,9 @@ static bool gfx_dxgi_start_frame(void) { } double orig_wait = vsyncs_to_wait; if (floor(vsyncs_to_wait) != vsyncs_to_wait) { - uint64_t left = last_end_us + floor(vsyncs_to_wait) * estimated_vsync_interval_us; - uint64_t right = last_end_us + ceil(vsyncs_to_wait) * estimated_vsync_interval_us; - uint64_t adjusted_desired_time = dxgi.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR + (last_end_us + (FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR) > dxgi.frame_timestamp / FRAME_INTERVAL_US_DENOMINATOR ? 2000 : -2000); + uint64_t left = last_end_ns + floor(vsyncs_to_wait) * estimated_vsync_interval_ns; + uint64_t right = last_end_ns + ceil(vsyncs_to_wait) * estimated_vsync_interval_ns; + uint64_t adjusted_desired_time = dxgi.frame_timestamp / FRAME_INTERVAL_NS_DENOMINATOR + (last_end_ns + (FRAME_INTERVAL_NS_NUMERATOR / FRAME_INTERVAL_NS_DENOMINATOR) > dxgi.frame_timestamp / FRAME_INTERVAL_NS_DENOMINATOR ? 2000000 : -2000000); int64_t diff_left = adjusted_desired_time - left; int64_t diff_right = right - adjusted_desired_time; if (diff_left < 0) { @@ -506,7 +523,7 @@ static void gfx_dxgi_swap_buffers_begin(void) { LARGE_INTEGER t; if (dxgi.use_timer) { QueryPerformanceCounter(&t); - int64_t next = qpc_to_100ns(dxgi.previous_present_time.QuadPart) + 10 * FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR; + int64_t next = qpc_to_100ns(dxgi.previous_present_time.QuadPart) + FRAME_INTERVAL_NS_NUMERATOR / (FRAME_INTERVAL_NS_DENOMINATOR * 100); int64_t left = next - qpc_to_100ns(t.QuadPart); if (left > 0) { LARGE_INTEGER li; @@ -531,6 +548,32 @@ static void gfx_dxgi_swap_buffers_end(void) { QueryPerformanceCounter(&t0); QueryPerformanceCounter(&t1); + if (dxgi.applied_maximum_frame_latency > dxgi.maximum_frame_latency) { + // There seems to be a bug that if latency is decreased, there is no effect of that operation, so recreate swap chain + if (dxgi.waitable_object != nullptr) { + if (!dxgi.dropped_frame) { + // Wait the last time on this swap chain + WaitForSingleObject(dxgi.waitable_object, INFINITE); + } + CloseHandle(dxgi.waitable_object); + dxgi.waitable_object = nullptr; + } + + dxgi.before_destroy_swap_chain_fn(); + + dxgi.swap_chain.Reset(); + + gfx_dxgi_create_swap_chain(dxgi.swap_chain_device.Get(), move(dxgi.before_destroy_swap_chain_fn)); + + dxgi.frame_timestamp = 0; + dxgi.frame_stats.clear(); + dxgi.pending_frame_stats.clear(); + + return; // Make sure we don't wait a second time on the waitable object, since that would hang the program + } else if (dxgi.applied_maximum_frame_latency != dxgi.maximum_frame_latency) { + apply_maximum_frame_latency(false); + } + if (!dxgi.dropped_frame) { if (dxgi.waitable_object != nullptr) { WaitForSingleObject(dxgi.waitable_object, INFINITE); @@ -554,8 +597,20 @@ static double gfx_dxgi_get_time(void) { return (double)(t.QuadPart - dxgi.qpc_init) / dxgi.qpc_freq; } -static void gfx_dxgi_set_frame_divisor(int divisor) { - dxgi.frame_divisor = divisor; +static void gfx_dxgi_set_target_fps(int fps) { + uint32_t old_fps = dxgi.target_fps; + uint64_t t0 = dxgi.frame_timestamp / old_fps; + uint32_t t1 = dxgi.frame_timestamp % old_fps; + dxgi.target_fps = fps; + dxgi.frame_timestamp = t0 * dxgi.target_fps + t1 * dxgi.target_fps / old_fps; +} + +static void gfx_dxgi_set_maximum_frame_latency(int latency) { + dxgi.maximum_frame_latency = latency; +} + +static float gfx_dxgi_get_detected_hz() { + return dxgi.detected_hz; } void gfx_dxgi_create_factory_and_device(bool debug, int d3d_version, bool (*create_device_fn)(IDXGIAdapter1 *adapter, bool test_only)) { @@ -592,12 +647,12 @@ void gfx_dxgi_create_factory_and_device(bool debug, int d3d_version, bool (*crea SetWindowTextW(dxgi.h_wnd, w_title); } -ComPtr gfx_dxgi_create_swap_chain(IUnknown *device) { +void gfx_dxgi_create_swap_chain(IUnknown *device, std::function&& before_destroy_fn) { bool win8 = IsWindows8OrGreater(); // DXGI_SCALING_NONE is only supported on Win8 and beyond bool dxgi_13 = dxgi.CreateDXGIFactory2 != nullptr; // DXGI 1.3 introduced waitable object DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {}; - swap_chain_desc.BufferCount = 2; + swap_chain_desc.BufferCount = 3; swap_chain_desc.Width = 0; swap_chain_desc.Height = 0; swap_chain_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; @@ -617,28 +672,24 @@ ComPtr gfx_dxgi_create_swap_chain(IUnknown *device) { }); ThrowIfFailed(dxgi.factory->MakeWindowAssociation(dxgi.h_wnd, DXGI_MWA_NO_ALT_ENTER)); - ComPtr swap_chain2; - if (dxgi.swap_chain->QueryInterface(__uuidof(IDXGISwapChain2), &swap_chain2) == S_OK) { - ThrowIfFailed(swap_chain2->SetMaximumFrameLatency(1)); - dxgi.waitable_object = swap_chain2->GetFrameLatencyWaitableObject(); - WaitForSingleObject(dxgi.waitable_object, INFINITE); - } else { - ComPtr device1; - ThrowIfFailed(device->QueryInterface(IID_PPV_ARGS(&device1))); - ThrowIfFailed(device1->SetMaximumFrameLatency(1)); - } + apply_maximum_frame_latency(true); ThrowIfFailed(dxgi.swap_chain->GetDesc1(&swap_chain_desc)); dxgi.current_width = swap_chain_desc.Width; dxgi.current_height = swap_chain_desc.Height; - return dxgi.swap_chain; + dxgi.swap_chain_device = device; + dxgi.before_destroy_swap_chain_fn = std::move(before_destroy_fn); } HWND gfx_dxgi_get_h_wnd(void) { return dxgi.h_wnd; } +IDXGISwapChain1* gfx_dxgi_get_swap_chain() { + return dxgi.swap_chain.Get(); +} + void ThrowIfFailed(HRESULT res) { if (FAILED(res)) { fprintf(stderr, "Error: 0x%08X\n", res); @@ -668,7 +719,9 @@ extern "C" struct GfxWindowManagerAPI gfx_dxgi_api = { gfx_dxgi_swap_buffers_begin, gfx_dxgi_swap_buffers_end, gfx_dxgi_get_time, - gfx_dxgi_set_frame_divisor, + gfx_dxgi_set_target_fps, + gfx_dxgi_set_maximum_frame_latency, + gfx_dxgi_get_detected_hz, }; #endif diff --git a/libultraship/libultraship/Lib/Fast3D/gfx_dxgi.h b/libultraship/libultraship/Lib/Fast3D/gfx_dxgi.h index d590daa66..134ebed64 100644 --- a/libultraship/libultraship/Lib/Fast3D/gfx_dxgi.h +++ b/libultraship/libultraship/Lib/Fast3D/gfx_dxgi.h @@ -4,9 +4,15 @@ #include "gfx_rendering_api.h" #ifdef DECLARE_GFX_DXGI_FUNCTIONS + +#include + +#include + void gfx_dxgi_create_factory_and_device(bool debug, int d3d_version, bool (*create_device_fn)(IDXGIAdapter1 *adapter, bool test_only)); -Microsoft::WRL::ComPtr gfx_dxgi_create_swap_chain(IUnknown *device); +void gfx_dxgi_create_swap_chain(IUnknown *device, std::function&& before_destroy_fn); HWND gfx_dxgi_get_h_wnd(void); +IDXGISwapChain1* gfx_dxgi_get_swap_chain(); void ThrowIfFailed(HRESULT res); void ThrowIfFailed(HRESULT res, HWND h_wnd, const char *message); #endif diff --git a/libultraship/libultraship/Lib/Fast3D/gfx_pc.cpp b/libultraship/libultraship/Lib/Fast3D/gfx_pc.cpp index 98bab7f78..452b511d8 100644 --- a/libultraship/libultraship/Lib/Fast3D/gfx_pc.cpp +++ b/libultraship/libultraship/Lib/Fast3D/gfx_pc.cpp @@ -2789,6 +2789,9 @@ void gfx_run(Gfx *commands, const std::unordered_map& mtx_replaceme gfx_rapi->start_frame(); gfx_rapi->start_draw_to_framebuffer(game_renders_to_framebuffer ? game_framebuffer : 0, (float)gfx_current_dimensions.height / SCREEN_HEIGHT); gfx_rapi->clear_framebuffer(); + rdp.viewport_or_scissor_changed = true; + rendering_state.viewport = {}; + rendering_state.scissor = {}; gfx_run_dl(commands); gfx_flush(); SohUtils::saveEnvironmentVar("framebuffer", string()); @@ -2825,8 +2828,16 @@ void gfx_end_frame(void) { } } -void gfx_set_framedivisor(int divisor) { - gfx_wapi->set_frame_divisor(divisor); +void gfx_set_target_fps(int fps) { + gfx_wapi->set_target_fps(fps); +} + +void gfx_set_maximum_frame_latency(int latency) { + gfx_wapi->set_maximum_frame_latency(latency); +} + +float gfx_get_detected_hz(void) { + return gfx_wapi->get_detected_hz(); } int gfx_create_framebuffer(uint32_t width, uint32_t height) { diff --git a/libultraship/libultraship/Lib/Fast3D/gfx_pc.h b/libultraship/libultraship/Lib/Fast3D/gfx_pc.h index 2ecb68898..65a6b57ba 100644 --- a/libultraship/libultraship/Lib/Fast3D/gfx_pc.h +++ b/libultraship/libultraship/Lib/Fast3D/gfx_pc.h @@ -67,7 +67,9 @@ struct GfxRenderingAPI* gfx_get_current_rendering_api(void); void gfx_start_frame(void); void gfx_run(Gfx* commands, const std::unordered_map& mtx_replacements); void gfx_end_frame(void); -void gfx_set_framedivisor(int); +void gfx_set_target_fps(int); +void gfx_set_maximum_frame_latency(int latency); +float gfx_get_detected_hz(void); void gfx_texture_cache_clear(); extern "C" int gfx_create_framebuffer(uint32_t width, uint32_t height); void gfx_get_pixel_depth_prepare(float x, float y); diff --git a/libultraship/libultraship/Lib/Fast3D/gfx_sdl2.cpp b/libultraship/libultraship/Lib/Fast3D/gfx_sdl2.cpp index e80097c81..94c3ef5ca 100644 --- a/libultraship/libultraship/Lib/Fast3D/gfx_sdl2.cpp +++ b/libultraship/libultraship/Lib/Fast3D/gfx_sdl2.cpp @@ -123,11 +123,10 @@ static uint64_t previous_time; static HANDLE timer; #endif -static int frameDivisor = 1; +static int target_fps = 60; -#define FRAME_INTERVAL_US_NUMERATOR_ 50000 -#define FRAME_INTERVAL_US_DENOMINATOR 3 -#define FRAME_INTERVAL_US_NUMERATOR (FRAME_INTERVAL_US_NUMERATOR_ * frameDivisor) +#define FRAME_INTERVAL_US_NUMERATOR 1000000 +#define FRAME_INTERVAL_US_DENOMINATOR (target_fps) static void gfx_sdl_init(const char *game_name, bool start_in_fullscreen) { SDL_Init(SDL_INIT_VIDEO); @@ -266,15 +265,16 @@ static uint64_t qpc_to_100ns(uint64_t qpc) { static inline void sync_framerate_with_timer(void) { uint64_t t; - t = SDL_GetPerformanceCounter(); + t = qpc_to_100ns(SDL_GetPerformanceCounter()); - const int64_t next = qpc_to_100ns(previous_time) + 10 * FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR; - const int64_t left = next - qpc_to_100ns(t); + const int64_t next = previous_time + 10 * FRAME_INTERVAL_US_NUMERATOR / FRAME_INTERVAL_US_DENOMINATOR; + const int64_t left = next - t; if (left > 0) { #ifdef __linux__ const timespec spec = { 0, left * 100 }; nanosleep(&spec, nullptr); #else + // The accuracy of this timer seems to usually be within +- 1.0 ms LARGE_INTEGER li; li.QuadPart = -left; SetWaitableTimer(timer, &li, 0, nullptr, nullptr, false); @@ -282,7 +282,13 @@ static inline void sync_framerate_with_timer(void) { #endif } - t = SDL_GetPerformanceCounter(); + t = qpc_to_100ns(SDL_GetPerformanceCounter()); + if (left > 0 && t - next < 10000) { + // In case it takes some time for the application to wake up after sleep, + // or inaccurate timer, + // don't let that slow down the framerate. + t = next; + } previous_time = t; } @@ -299,9 +305,16 @@ static double gfx_sdl_get_time(void) { return 0.0; } -static void gfx_sdl_set_framedivisor(int divisor) -{ - frameDivisor = divisor; +static void gfx_sdl_set_target_fps(int fps) { + target_fps = fps; +} + +static void gfx_sdl_set_maximum_frame_latency(int latency) { + // Not supported by SDL :( +} + +static float gfx_sdl_get_detected_hz(void) { + return 0; } struct GfxWindowManagerAPI gfx_sdl = { @@ -317,7 +330,9 @@ struct GfxWindowManagerAPI gfx_sdl = { gfx_sdl_swap_buffers_begin, gfx_sdl_swap_buffers_end, gfx_sdl_get_time, - gfx_sdl_set_framedivisor + gfx_sdl_set_target_fps, + gfx_sdl_set_maximum_frame_latency, + gfx_sdl_get_detected_hz }; #endif diff --git a/libultraship/libultraship/Lib/Fast3D/gfx_window_manager_api.h b/libultraship/libultraship/Lib/Fast3D/gfx_window_manager_api.h index 5d7442390..bd45ccdf4 100644 --- a/libultraship/libultraship/Lib/Fast3D/gfx_window_manager_api.h +++ b/libultraship/libultraship/Lib/Fast3D/gfx_window_manager_api.h @@ -17,7 +17,9 @@ struct GfxWindowManagerAPI { void (*swap_buffers_begin)(void); void (*swap_buffers_end)(void); double (*get_time)(void); // For debug - void (*set_frame_divisor)(int); + void (*set_target_fps)(int fps); + void (*set_maximum_frame_latency)(int latency); + float (*get_detected_hz)(void); }; #endif diff --git a/libultraship/libultraship/SohImGuiImpl.cpp b/libultraship/libultraship/SohImGuiImpl.cpp index 2a897437f..35643838f 100644 --- a/libultraship/libultraship/SohImGuiImpl.cpp +++ b/libultraship/libultraship/SohImGuiImpl.cpp @@ -680,6 +680,34 @@ namespace SohImGui { Tooltip("Activates anti-aliasing when above 1, up to 8x for 8 samples for every pixel"); gfx_msaa_level = CVar_GetS32("gMSAAValue", 1); + if (impl.backend == Backend::DX11) + { + const char* cvar = "gExtraLatencyThreshold"; + int val = CVar_GetS32(cvar, 80); + val = MAX(MIN(val, 250), 0); + int fps = val; + + if (fps == 0) + { + ImGui::Text("Jitter fix: Off"); + } + else + { + ImGui::Text("Jitter fix: >= %d FPS", fps); + } + + if (ImGui::SliderInt("##ExtraLatencyThreshold", &val, 0, 250, "", ImGuiSliderFlags_AlwaysClamp)) + { + CVar_SetS32(cvar, val); + needs_save = true; + } + + Tooltip("When Interpolation FPS setting is at least this threshold,\n" + "add one frame of input lag (e.g. 16.6 ms for 60 FPS) in order to avoid jitter.\n" + "This setting allows the CPU to work on one frame while GPU works on the previous frame.\n" + "This setting should be used when your computer is too slow to do CPU + GPU work in time."); + } + EXPERIMENTAL(); ImGui::Text("Texture Filter (Needs reload)"); GfxRenderingAPI* gapi = gfx_get_current_rendering_api(); @@ -764,7 +792,46 @@ namespace SohImGui { EXPERIMENTAL(); - EnhancementCheckbox("60FPS Interpolation", "g60FPS"); + const char* fps_cvar = "gInterpolationFPS"; + { + int val = CVar_GetS32(fps_cvar, 20); + val = MAX(MIN(val, 250), 20); + int fps = val; + + if (fps == 20) + { + ImGui::Text("Frame interpolation: Off"); + } + else + { + ImGui::Text("Frame interpolation: %d FPS", fps); + } + + if (ImGui::SliderInt("##FPSInterpolation", &val, 20, 250, "", ImGuiSliderFlags_AlwaysClamp)) + { + CVar_SetS32(fps_cvar, val); + needs_save = true; + } + + Tooltip("Interpolate extra frames to get smoother graphics.\n" + "Set to match your monitor's refresh rate, or a divisor of it.\n" + "A higher target FPS than your monitor's refresh rate will just waste resources,\n" + "and might give a worse result.\n" + "For consistent input lag, set this value and your monitor's refresh rate to a multiple of 20.\n" + "Ctrl+Click for keyboard input."); + } + if (impl.backend == Backend::DX11) + { + if (ImGui::Button("Match Refresh Rate")) + { + int hz = roundf(gfx_get_detected_hz()); + if (hz >= 20 && hz <= 250) + { + CVar_SetS32(fps_cvar, hz); + needs_save = true; + } + } + } EnhancementCheckbox("Disable LOD", "gDisableLOD"); Tooltip("Turns off the level of detail setting, making models always use their higher poly variants"); diff --git a/libultraship/libultraship/Window.cpp b/libultraship/libultraship/Window.cpp index 5c2cf3cae..8319332ad 100644 --- a/libultraship/libultraship/Window.cpp +++ b/libultraship/libultraship/Window.cpp @@ -272,13 +272,14 @@ namespace Ship { gfx_run(Commands, m); gfx_end_frame(); } - gfx_run(Commands, {}); - gfx_end_frame(); } - void Window::SetFrameDivisor(int divisor) { - gfx_set_framedivisor(divisor); - //gfx_set_framedivisor(0); + void Window::SetTargetFps(int fps) { + gfx_set_target_fps(fps); + } + + void Window::SetMaximumFrameLatency(int latency) { + gfx_set_maximum_frame_latency(latency); } void Window::GetPixelDepthPrepare(float x, float y) { diff --git a/libultraship/libultraship/Window.h b/libultraship/libultraship/Window.h index 04886c53e..a3087bf0c 100644 --- a/libultraship/libultraship/Window.h +++ b/libultraship/libultraship/Window.h @@ -20,7 +20,8 @@ namespace Ship { void Init(); void StartFrame(); void RunCommands(Gfx* Commands, const std::vector>& mtx_replacements); - void SetFrameDivisor(int divisor); + void SetTargetFps(int fps); + void SetMaximumFrameLatency(int latency); void GetPixelDepthPrepare(float x, float y); uint16_t GetPixelDepth(float x, float y); void ToggleFullscreen(); diff --git a/soh/soh/OTRGlobals.cpp b/soh/soh/OTRGlobals.cpp index e8e1b31c1..9401d514a 100644 --- a/soh/soh/OTRGlobals.cpp +++ b/soh/soh/OTRGlobals.cpp @@ -175,8 +175,6 @@ extern "C" void Graph_StartFrame() { // C->C++ Bridge extern "C" void Graph_ProcessGfxCommands(Gfx* commands) { - OTRGlobals::Instance->context->GetWindow()->SetFrameDivisor(CVar_GetS32("g60FPS", 0) == 0 ? R_UPDATE_RATE : 1); - if (!audio.initialized) { audio.initialized = true; std::thread([]() { @@ -226,15 +224,45 @@ extern "C" void Graph_ProcessGfxCommands(Gfx* commands) { audio.cv_to_thread.notify_one(); std::vector> mtx_replacements; - if (CVar_GetS32("g60FPS", 0) != 0) { - int to = R_UPDATE_RATE; - for (int i = 1; i < to; i++) { - mtx_replacements.push_back(FrameInterpolation_Interpolate(i / (float)to)); + int target_fps = CVar_GetS32("gInterpolationFPS", 20); + static int last_fps; + static int last_update_rate; + static int time; + int fps = target_fps; + int original_fps = 60 / R_UPDATE_RATE; + + if (target_fps == 20 || original_fps > target_fps) { + fps = original_fps; + } + + if (last_fps != fps || last_update_rate != R_UPDATE_RATE) { + time = 0; + } + + // time_base = fps * original_fps (one second) + int next_original_frame = fps; + + while (time + original_fps <= next_original_frame) { + time += original_fps; + if (time != next_original_frame) { + mtx_replacements.push_back(FrameInterpolation_Interpolate((float)time / next_original_frame)); + } else { + mtx_replacements.emplace_back(); } } + time -= fps; + + OTRGlobals::Instance->context->GetWindow()->SetTargetFps(fps); + + int threshold = CVar_GetS32("gExtraLatencyThreshold", 80); + OTRGlobals::Instance->context->GetWindow()->SetMaximumFrameLatency(threshold > 0 && target_fps >= threshold ? 2 : 1); + OTRGlobals::Instance->context->GetWindow()->RunCommands(commands, mtx_replacements); + last_fps = fps; + last_update_rate = R_UPDATE_RATE; + { std::unique_lock Lock(audio.mutex); while (audio.processing) { diff --git a/soh/soh/frame_interpolation.cpp b/soh/soh/frame_interpolation.cpp index bd85d41ce..44c255db1 100644 --- a/soh/soh/frame_interpolation.cpp +++ b/soh/soh/frame_interpolation.cpp @@ -451,7 +451,7 @@ void FrameInterpolation_StartRecord(void) { current_recording = {}; current_path.clear(); current_path.push_back(¤t_recording.root_path); - if (CVar_GetS32("g60FPS", 0) != 0) { + if (CVar_GetS32("gInterpolationFPS", 20) != 20) { is_recording = true; } } diff --git a/soh/src/code/z_actor.c b/soh/src/code/z_actor.c index ba5d4dcd6..bdb2fe78c 100644 --- a/soh/src/code/z_actor.c +++ b/soh/src/code/z_actor.c @@ -2290,8 +2290,8 @@ void Actor_DrawFaroresWindPointer(GlobalContext* globalCtx) { ((void)0, gSaveContext.respawn[RESPAWN_MODE_TOP].pos.y) + yOffset, ((void)0, gSaveContext.respawn[RESPAWN_MODE_TOP].pos.z), 255, 255, 255, lightRadius); - CLOSE_DISPS(globalCtx->state.gfxCtx, "../z_actor.c", 5474); } + CLOSE_DISPS(globalCtx->state.gfxCtx, "../z_actor.c", 5474); } void func_80030488(GlobalContext* globalCtx) { diff --git a/soh/src/overlays/gamestates/ovl_file_choose/z_file_choose.c b/soh/src/overlays/gamestates/ovl_file_choose/z_file_choose.c index 570eedd1f..03a93e949 100644 --- a/soh/src/overlays/gamestates/ovl_file_choose/z_file_choose.c +++ b/soh/src/overlays/gamestates/ovl_file_choose/z_file_choose.c @@ -5,6 +5,8 @@ #include "textures/title_static/title_static.h" #include "textures/parameter_static/parameter_static.h" +#include "soh/frame_interpolation.h" + static s16 sUnused = 106; static s16 sScreenFillAlpha = 255; @@ -1136,6 +1138,8 @@ void FileChoose_ConfigModeDraw(GameState* thisx) { FileChoose_SetWindowVtx(&this->state); FileChoose_SetWindowContentVtx(&this->state); + FrameInterpolation_RecordOpenChild(this, this->configMode); + if ((this->configMode != CM_NAME_ENTRY) && (this->configMode != CM_START_NAME_ENTRY)) { gDPPipeSync(POLY_OPA_DISP++); gDPSetCombineMode(POLY_OPA_DISP++, G_CC_MODULATEIA_PRIM, G_CC_MODULATEIA_PRIM); @@ -1227,6 +1231,8 @@ void FileChoose_ConfigModeDraw(GameState* thisx) { gDPPipeSync(POLY_OPA_DISP++); FileChoose_SetView(this, 0.0f, 0.0f, 64.0f); + FrameInterpolation_RecordCloseChild(); + CLOSE_DISPS(this->state.gfxCtx, "../z_file_choose.c", 2352); } @@ -1669,7 +1675,9 @@ void FileChoose_Main(GameState* thisx) { FileChoose_PulsateCursor(&this->state); gFileSelectUpdateFuncs[this->menuMode](&this->state); + FrameInterpolation_StartRecord(); gFileSelectDrawFuncs[this->menuMode](&this->state); + FrameInterpolation_StopRecord(); // do not draw controls text in the options menu if ((this->configMode <= CM_NAME_ENTRY_TO_MAIN) || (this->configMode >= CM_UNUSED_DELAY)) {