From c5e84c17a5401dc8226005d015268c905ca62877 Mon Sep 17 00:00:00 2001 From: David Chavez Date: Wed, 6 Jul 2022 01:29:57 +0200 Subject: [PATCH] Use unordered maps in some hot paths (#566) * Use unordered maps in some hot paths * Address PR comments --- libultraship/libultraship/Archive.h | 3 +- .../Lib/Fast3D/gfx_direct3d11.cpp | 7 ++- .../libultraship/Lib/Fast3D/gfx_opengl.cpp | 5 +- .../libultraship/Lib/Fast3D/gfx_pc.cpp | 49 ++++++------------- .../Lib/Fast3D/gfx_rendering_api.h | 14 +++++- libultraship/libultraship/ResourceMgr.h | 6 +-- 6 files changed, 41 insertions(+), 43 deletions(-) diff --git a/libultraship/libultraship/Archive.h b/libultraship/libultraship/Archive.h index 6294606ac..0eeaa4021 100644 --- a/libultraship/libultraship/Archive.h +++ b/libultraship/libultraship/Archive.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include "Resource.h" @@ -45,7 +46,7 @@ namespace Ship std::string PatchesPath; std::map mpqHandles; std::vector addedFiles; - std::map hashes; + std::unordered_map hashes; HANDLE mainMPQ; bool LoadMainMPQ(bool enableWriting, bool genCRCMap); diff --git a/libultraship/libultraship/Lib/Fast3D/gfx_direct3d11.cpp b/libultraship/libultraship/Lib/Fast3D/gfx_direct3d11.cpp index 3d1fd3546..b8ee8291d 100644 --- a/libultraship/libultraship/Lib/Fast3D/gfx_direct3d11.cpp +++ b/libultraship/libultraship/Lib/Fast3D/gfx_direct3d11.cpp @@ -4,6 +4,9 @@ #include #include +#include +#include + #include #include #include @@ -902,7 +905,7 @@ FilteringMode gfx_d3d11_get_texture_filter(void) { return d3d.current_filter_mode; } -std::map, uint16_t> gfx_d3d11_get_pixel_depth(int fb_id, const std::set>& coordinates) { +std::unordered_map, uint16_t, hash_pair_ff> gfx_d3d11_get_pixel_depth(int fb_id, const std::set>& coordinates) { Framebuffer& fb = d3d.framebuffers[fb_id]; TextureData& td = d3d.textures[fb.texture_id]; @@ -990,7 +993,7 @@ std::map, uint16_t> gfx_d3d11_get_pixel_depth(int fb_id, d3d.context->CopyResource(d3d.depth_value_output_buffer_copy.Get(), d3d.depth_value_output_buffer.Get()); ThrowIfFailed(d3d.context->Map(d3d.depth_value_output_buffer_copy.Get(), 0, D3D11_MAP_READ, 0, &ms)); - std::map, uint16_t> res; + std::unordered_map, uint16_t, hash_pair_ff> res; { size_t i = 0; for (const auto& coord : coordinates) { diff --git a/libultraship/libultraship/Lib/Fast3D/gfx_opengl.cpp b/libultraship/libultraship/Lib/Fast3D/gfx_opengl.cpp index c7b71962c..45118b2b3 100644 --- a/libultraship/libultraship/Lib/Fast3D/gfx_opengl.cpp +++ b/libultraship/libultraship/Lib/Fast3D/gfx_opengl.cpp @@ -6,6 +6,7 @@ #include #include +#include #ifndef _LANGUAGE_C #define _LANGUAGE_C @@ -872,8 +873,8 @@ void gfx_opengl_select_texture_fb(int fb_id) { glBindTexture(GL_TEXTURE_2D, framebuffers[fb_id].clrbuf); } -static std::map, uint16_t> gfx_opengl_get_pixel_depth(int fb_id, const std::set>& coordinates) { - std::map, uint16_t> res; +static std::unordered_map, uint16_t, hash_pair_ff> gfx_opengl_get_pixel_depth(int fb_id, const std::set>& coordinates) { + std::unordered_map, uint16_t, hash_pair_ff> res; Framebuffer& fb = framebuffers[fb_id]; diff --git a/libultraship/libultraship/Lib/Fast3D/gfx_pc.cpp b/libultraship/libultraship/Lib/Fast3D/gfx_pc.cpp index 89019aa97..785b7b898 100644 --- a/libultraship/libultraship/Lib/Fast3D/gfx_pc.cpp +++ b/libultraship/libultraship/Lib/Fast3D/gfx_pc.cpp @@ -216,7 +216,7 @@ static map::iterator active_fb; static map framebuffers; static set> get_pixel_depth_pending; -static map, uint16_t> get_pixel_depth_cached; +static unordered_map, uint16_t, hash_pair_ff> get_pixel_depth_cached; #ifdef _WIN32 // TODO: Properly implement for MSVC @@ -901,6 +901,7 @@ static void calculate_normal_dir(const Light_t *light, float coeffs[3]) { light->dir[1] / 127.0f, light->dir[2] / 127.0f }; + gfx_transposed_matrix_mul(coeffs, light_dir, rsp.modelview_matrix_stack[rsp.modelview_matrix_stack_size - 1]); gfx_normalize_vector(coeffs); } @@ -1003,16 +1004,6 @@ static void gfx_sp_vertex(size_t n_vertices, size_t dest_index, const Vtx *verti const Vtx_tn *vn = &vertices[i].n; struct LoadedVertex *d = &rsp.loaded_vertices[dest_index]; - if (markerOn) - { - int bp = 0; - } - - if ((uintptr_t)vertices == 0x14913ec0) - { - int bp = 0; - } - if (v == NULL) return; @@ -1021,11 +1012,6 @@ static void gfx_sp_vertex(size_t n_vertices, size_t dest_index, const Vtx *verti float z = v->ob[0] * rsp.MP_matrix[0][2] + v->ob[1] * rsp.MP_matrix[1][2] + v->ob[2] * rsp.MP_matrix[2][2] + rsp.MP_matrix[3][2]; float w = v->ob[0] * rsp.MP_matrix[0][3] + v->ob[1] * rsp.MP_matrix[1][3] + v->ob[2] * rsp.MP_matrix[2][3] + rsp.MP_matrix[3][3]; - if (markerOn) - { - int bp = 0; - } - x = gfx_adjust_x_for_aspect_ratio(x); short U = v->tc[0] * rsp.texture_scaling_factor.s >> 16; @@ -1077,10 +1063,8 @@ static void gfx_sp_vertex(size_t n_vertices, size_t dest_index, const Vtx *verti dotx /= 127.0f; doty /= 127.0f; - if (dotx < -1.0f) dotx = -1.0f; - if (dotx > 1.0f) dotx = 1.0f; - if (doty < -1.0f) doty = -1.0f; - if (doty > 1.0f) doty = 1.0f; + std::clamp(dotx, -1.0f, 1.0f); + std::clamp(doty, -1.0f, 1.0f); if (rsp.geometry_mode & G_TEXTURE_GEN_LINEAR) { // Not sure exactly what formula we should use to get accurate values @@ -1088,8 +1072,8 @@ static void gfx_sp_vertex(size_t n_vertices, size_t dest_index, const Vtx *verti doty = (2.906921f * doty * doty + 1.36114f) * doty; dotx = (dotx + 1.0f) / 4.0f; doty = (doty + 1.0f) / 4.0f;*/ - dotx = acosf(-dotx) /*/ (3.14159265f)*/ / 4.0f; - doty = acosf(-doty) /*/ (3.14159265f)*/ / 4.0f; + dotx = acosf(-dotx) /* M_PI */ / 4.0f; + doty = acosf(-doty) /* M_PI */ / 4.0f; } else { dotx = (dotx + 1.0f) / 4.0f; @@ -1110,12 +1094,12 @@ static void gfx_sp_vertex(size_t n_vertices, size_t dest_index, const Vtx *verti // trivial clip rejection d->clip_rej = 0; - if (x < -w) d->clip_rej |= 1; - if (x > w) d->clip_rej |= 2; - if (y < -w) d->clip_rej |= 4; - if (y > w) d->clip_rej |= 8; - //if (z < -w) d->clip_rej |= 16; - if (z > w) d->clip_rej |= 32; + if (x < -w) d->clip_rej |= 1; // CLIP_LEFT + if (x > w) d->clip_rej |= 2; // CLIP_RIGHT + if (y < -w) d->clip_rej |= 4; // CLIP_BOTTOM + if (y > w) d->clip_rej |= 8; // CLIP_TOP + // if (z < -w) d->clip_rej |= 16; // CLIP_NEAR + if (z > w) d->clip_rej |= 32; // CLIP_FAR d->x = x; d->y = y; @@ -1129,13 +1113,10 @@ static void gfx_sp_vertex(size_t n_vertices, size_t dest_index, const Vtx *verti } float winv = 1.0f / w; - if (winv < 0.0f) { - winv = 32767.0f; - } + if (winv < 0.0f) winv = std::numeric_limits::max(); float fog_z = z * winv * rsp.fog_mul + rsp.fog_offset; - if (fog_z < 0) fog_z = 0; - if (fog_z > 255) fog_z = 255; + std::clamp(fog_z, 0.0f, 255.0f); d->color.a = fog_z; // Use alpha variable to store fog factor } else { d->color.a = v->cn[3]; @@ -2881,7 +2862,7 @@ uint16_t gfx_get_pixel_depth(float x, float y) { get_pixel_depth_pending.emplace(x, y); - map, uint16_t> res = gfx_rapi->get_pixel_depth(game_renders_to_framebuffer ? game_framebuffer : 0, get_pixel_depth_pending); + unordered_map, uint16_t, hash_pair_ff> res = gfx_rapi->get_pixel_depth(game_renders_to_framebuffer ? game_framebuffer : 0, get_pixel_depth_pending); get_pixel_depth_cached.merge(res); get_pixel_depth_pending.clear(); diff --git a/libultraship/libultraship/Lib/Fast3D/gfx_rendering_api.h b/libultraship/libultraship/Lib/Fast3D/gfx_rendering_api.h index 6318be492..02bb5afbc 100644 --- a/libultraship/libultraship/Lib/Fast3D/gfx_rendering_api.h +++ b/libultraship/libultraship/Lib/Fast3D/gfx_rendering_api.h @@ -6,6 +6,7 @@ #include #include +#include #include struct ShaderProgram; @@ -21,6 +22,17 @@ enum FilteringMode { NONE }; +// A hash function used to hash a: pair +struct hash_pair_ff { + size_t operator()(const std::pair &p ) const { + auto hash1 = std::hash{}(p.first); + auto hash2 = std::hash{}(p.second); + + // If hash1 == hash2, their XOR is zero. + return (hash1 != hash2) ? hash1 ^ hash2 : hash1; + } +}; + struct GfxRenderingAPI { struct GfxClipParameters (*get_clip_parameters)(void); void (*unload_shader)(struct ShaderProgram *old_prg); @@ -48,7 +60,7 @@ struct GfxRenderingAPI { void (*start_draw_to_framebuffer)(int fb_id, float noise_scale); void (*clear_framebuffer)(void); void (*resolve_msaa_color_buffer)(int fb_id_target, int fb_id_source); - std::map, uint16_t> (*get_pixel_depth)(int fb_id, const std::set>& coordinates); + std::unordered_map, uint16_t, hash_pair_ff> (*get_pixel_depth)(int fb_id, const std::set>& coordinates); void *(*get_framebuffer_texture_id)(int fb_id); void (*select_texture_fb)(int fb_id); void (*delete_texture)(uint32_t texID); diff --git a/libultraship/libultraship/ResourceMgr.h b/libultraship/libultraship/ResourceMgr.h index 7a1cb5425..5604552d5 100644 --- a/libultraship/libultraship/ResourceMgr.h +++ b/libultraship/libultraship/ResourceMgr.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -52,8 +52,8 @@ namespace Ship private: std::weak_ptr Context; volatile bool bIsRunning; - std::map> FileCache; - std::map, std::less<>> ResourceCache; + std::unordered_map> FileCache; + std::unordered_map> ResourceCache; std::queue> FileLoadQueue; std::queue> ResourceLoadQueue; std::shared_ptr OTR;