From 5a7d7433d16be7d548b0a32fb4ae6efdb8336b6c Mon Sep 17 00:00:00 2001 From: veclavtalica Date: Sun, 9 Feb 2025 07:34:16 +0300 Subject: [PATCH] wip multithreaded texture load --- src/rendering/twn_gl_15_rendering.c | 2 +- src/rendering/twn_model.c | 22 ++-- src/twn_textures.c | 170 ++++++++++++++++++++++------ src/twn_textures_c.h | 12 +- src/twn_workers.c | 4 + 5 files changed, 158 insertions(+), 52 deletions(-) diff --git a/src/rendering/twn_gl_15_rendering.c b/src/rendering/twn_gl_15_rendering.c index 2616628..972dcbf 100644 --- a/src/rendering/twn_gl_15_rendering.c +++ b/src/rendering/twn_gl_15_rendering.c @@ -175,7 +175,7 @@ static void finally_use_2d_pipeline(void) { pipeline_last_used = PIPELINE_2D; } - +/* TODO: ensure we minimize depth func switching to enable Hi-Z (hierarchical depth) optimizations */ static void finally_use_texture_mode(TextureMode mode) { if (texture_mode_last_used == mode) return; diff --git a/src/rendering/twn_model.c b/src/rendering/twn_model.c index 65ec88f..1f27a0c 100644 --- a/src/rendering/twn_model.c +++ b/src/rendering/twn_model.c @@ -1,6 +1,7 @@ #include "twn_draw_c.h" #include "twn_draw.h" #include "twn_workers_c.h" +#include "twn_textures_c.h" #define FAST_OBJ_IMPLEMENTATION #define FAST_OBJ_REALLOC SDL_realloc @@ -29,6 +30,7 @@ static struct ModelDrawCommand { /* deferred queue of model files to load from worker threads */ static SDL_mutex *model_load_mutex; static char const **model_load_queue; +static size_t model_load_queued; static bool model_load_initialized; /* use streaming via callbacks to reduce memory congestion */ @@ -55,10 +57,11 @@ static unsigned long model_load_callback_size(void *handle, void *udata) { /* TODO: is there a way to do this nicely while locking main thread? */ /* sleeping over atomic counter might be good enough i guess */ +/* it's safe to access everything without lock after this returns true and no public api is possible to call */ static bool model_load_workers_finished(void) { bool result; SDL_LockMutex(model_load_mutex); - result = arrlenu(model_load_queue) == 0; + result = model_load_queued == 0; SDL_UnlockMutex(model_load_mutex); return result; } @@ -84,12 +87,12 @@ bool model_load_workers_thread(void) { .file_size = model_load_callback_size }; - /* TODO: immediately create jobs for missing textures */ - fastObjMesh *mesh = fast_obj_read_with_callbacks(load_request, &callbacks, NULL); + fastObjMesh *const mesh = fast_obj_read_with_callbacks(load_request, &callbacks, NULL); SDL_LockMutex(model_load_mutex); struct ModelCacheItem *item = shgetp(model_cache, load_request); item->value.mesh = mesh; + model_load_queued--; SDL_UnlockMutex(model_load_mutex); return true; @@ -108,12 +111,14 @@ void draw_model(const char *model, struct ModelCacheItem const *item; + /* TODO: make it lockless */ /* if model is missing, queue it up for loading */ SDL_LockMutex(model_load_mutex); if (!(item = shgetp_null(model_cache, model))) { model = SDL_strdup(model); shput(model_cache, model, (struct ModelCacheItemValue){0}); arrpush(model_load_queue, model); + model_load_queued++; SDL_SemPost(workers_job_semaphore); } else model = item->key; @@ -130,14 +135,14 @@ void draw_model(const char *model, void finally_draw_models(void) { - while (!model_load_workers_finished()) { - (void)0; - } + while (!model_load_workers_finished()) + SDL_Delay(1); /* TODO: have special path for them, preserving the buffers and potentially using instanced draw */ for (int i = 0; i < arrlen(model_draw_commands); ++i) { struct ModelDrawCommand const *const command = &model_draw_commands[i]; fastObjMesh const *const mesh = model_cache[shgeti(model_cache, command->model)].value.mesh; + SDL_assert(mesh); for (unsigned int g = 0; g < mesh->group_count; ++g) { fastObjGroup const *const group = &mesh->groups[g]; unsigned int idx = 0; @@ -208,9 +213,8 @@ void finally_draw_models(void) { /* drop model caches */ void free_model_cache(void) { - while (!model_load_workers_finished()) { - (void)0; - } + while (!model_load_workers_finished()) + SDL_Delay(1); for (size_t i = 0; i < shlenu(model_cache); ++i) { fast_obj_destroy(model_cache[i].value.mesh); diff --git a/src/twn_textures.c b/src/twn_textures.c index 6093d65..6a2f171 100644 --- a/src/twn_textures.c +++ b/src/twn_textures.c @@ -2,6 +2,7 @@ #include "twn_util.h" #include "twn_util_c.h" #include "twn_engine_context_c.h" +#include "twn_workers_c.h" #include #include @@ -19,6 +20,18 @@ typedef struct { } TextureLoadingContext; +static SDL_mutex *textures_load_mutex; +static struct TextureLoadRequest { + Texture result; /* will be copied into cache when it's time, freeing us from locking */ + size_t index; /* index into cache->hash to fill */ + /* 0 = awaits processing */ + /* 1 = in processing */ + /* 2 = success */ + /* 3 = failure */ + uint8_t status; +} *texture_load_queue; + + static int load_read_callback(void *user, char *data, int size) { TextureLoadingContext *context = user; int read = (int)SDL_RWread(context->rwops, data, 1, size); @@ -42,7 +55,7 @@ static int load_eof_callback(void *user) { static SDL_Surface *missing_texture_surface; -static uint16_t missing_texture_id; +static uint16_t missing_texture_id = TEXTURE_KEY_INVALID.id; static SDL_Surface *gen_missing_texture_surface(void) { Uint32 rmask, gmask, bmask; @@ -57,6 +70,8 @@ static SDL_Surface *gen_missing_texture_surface(void) { bmask = 0x00ff0000; #endif + SDL_LockMutex(textures_load_mutex); + if (!missing_texture_surface) { uint8_t *data = SDL_malloc(64 * 64 * 3); for (int y = 0; y < 64; ++y) { @@ -74,6 +89,8 @@ static SDL_Surface *gen_missing_texture_surface(void) { rmask, gmask, bmask, 0); } + SDL_UnlockMutex(textures_load_mutex); + return missing_texture_surface; } @@ -230,10 +247,17 @@ static void recreate_current_atlas_texture(TextureCache *cache) { /* uses the textures currently in the cache to create an array of stbrp_rects */ static stbrp_rect *create_rects_from_cache(TextureCache *cache) { stbrp_rect *rects = NULL; + bool missing_texture_used = false; for (size_t i = 0; i < shlenu(cache->hash); ++i) { if (cache->hash[i].value.loner_texture != 0) continue; + /* only put it once */ + if (!missing_texture_used && cache->hash[i].value.data == missing_texture_surface) { + missing_texture_used = true; + continue; + } + const SDL_Surface *surface_data = cache->hash[i].value.data; stbrp_rect new_rect = { .w = surface_data->w, @@ -318,6 +342,7 @@ void textures_cache_init(TextureCache *cache, SDL_Window *window) { sh_new_arena(cache->hash); cache->node_buffer = SDL_malloc(ctx.texture_atlas_size * sizeof *cache->node_buffer); + textures_load_mutex = SDL_CreateMutex(); add_new_atlas(cache); } @@ -346,6 +371,9 @@ void textures_cache_deinit(TextureCache *cache) { } shfree(cache->hash); + SDL_DestroyMutex(textures_load_mutex); + arrfree(texture_load_queue); + SDL_free(cache->node_buffer); } @@ -377,49 +405,90 @@ static enum TextureMode infer_texture_mode(SDL_Surface *surface) { return result; } +/* offloads surface load and transparency detection from main thread */ +bool textures_load_workers_thread(void) { + /* try grabbing some work */ + ssize_t texture_id = -1; + ssize_t queue_index = -1; + char *path = NULL; /* copy of a key, as it's not stable in arena */ -static TextureKey textures_load(TextureCache *cache, const char *path) { - /* no need to do anything if it was loaded already */ - const ptrdiff_t i = shgeti(cache->hash, path); - if (i >= 0) - return (TextureKey){ (uint16_t)i }; + SDL_LockMutex(textures_load_mutex); + for (size_t i = 0; i < arrlenu(texture_load_queue); ++i) { + if (texture_load_queue[i].status == 0) { + texture_id = texture_load_queue[i].index; + path = SDL_strdup(ctx.texture_cache.hash[texture_id].key); + texture_load_queue[i].status = 1; /* mark as in process */ + queue_index = i; + break; + } + } + SDL_UnlockMutex(textures_load_mutex); + /* nothing to do, bail */ + if (queue_index == -1) + return false; - SDL_Surface *surface = textures_load_surface(path); - if (surface == missing_texture_surface && missing_texture_id != 0) - return (TextureKey){ missing_texture_id }; + SDL_assert(texture_id != -1 && queue_index != -1); - Texture new_texture = { + SDL_Surface *const surface = textures_load_surface(path); + SDL_free(path); + + Texture const response = { .data = surface, .mode = infer_texture_mode(surface), }; - /* it's a "loner texture," it doesn't fit in an atlas so it's not in one */ - if (surface->w >= (int)ctx.texture_atlas_size || surface->h >= (int)ctx.texture_atlas_size) { - if (ctx.game.debug) { - if (surface->w > 2048 || surface->h > 2048) - log_warn("Unportable texture dimensions for %s, use 2048x2048 at max", path); - if (!is_power_of_two(surface->w) || !is_power_of_two(surface->h)) - log_warn("Unportable texture dimensions for %s, should be powers of 2", path); + SDL_LockMutex(textures_load_mutex); + + texture_load_queue[queue_index].result = response; + texture_load_queue[queue_index].status = 2; /* mark success */ + + /* reuse this id in the future, allowing for draw call merging */ + if (surface == missing_texture_surface && missing_texture_id == TEXTURE_KEY_INVALID.id) + missing_texture_id = (uint16_t)texture_id; + + SDL_UnlockMutex(textures_load_mutex); + + return true; +} + + +static TextureKey textures_load(TextureCache *cache, const char *path) { + /* at this point we assume that texture isn't loaded */ + + /* place a dummy for future lookups to know it will be loaded */ + /* as well as a place for worker to fill in */ + shput(cache->hash, path, (Texture){0}); + + /* append a new request, use stable indices */ + struct TextureLoadRequest const request = { + .index = shlenu(cache->hash) - 1, + }; + SDL_LockMutex(textures_load_mutex); + arrpush(texture_load_queue, request); + SDL_UnlockMutex(textures_load_mutex); + + /* signal work to do */ + SDL_SemPost(workers_job_semaphore); + + cache->is_dirty = true; + + /* report the newly created slot */ + return (TextureKey){ (uint16_t)shlenu(cache->hash) - 1 }; +} + + +/* it's safe to access everything without lock after this returns true and no public api is possible to call */ +static bool textures_load_workers_finished(void) { + bool result = true; + SDL_LockMutex(textures_load_mutex); + for (size_t i = 0; i < arrlenu(texture_load_queue); ++i) { + if (texture_load_queue[i].status == 0 || texture_load_queue[i].status == 1) { + result = false; + break; } - new_texture.loner_texture = create_gpu_texture(TEXTURE_FILTER_NEAREAST, true); - upload_texture_from_surface(new_texture.loner_texture, surface); - new_texture.srcrect = (Rect) { .w = (float)surface->w, .h = (float)surface->h }; - - } else { - /* will be fully populated as the atlas updates */ - new_texture.atlas_index = cache->atlas_index; - cache->is_dirty = true; } - - shput(cache->hash, path, new_texture); - - uint16_t const id = (uint16_t)shlenu(cache->hash) - 1; - - /* reuse this id for every later missing texture */ - if (surface == missing_texture_surface) - missing_texture_id = id; - - return (TextureKey){ id }; + SDL_UnlockMutex(textures_load_mutex); + return result; } @@ -427,6 +496,37 @@ void textures_update_atlas(TextureCache *cache) { if (!cache->is_dirty) return; + while (!textures_load_workers_finished()) + SDL_Delay(1); + + /* collect results */ + for (size_t i = 0; i < arrlenu(texture_load_queue); ++i) { + SDL_assert(texture_load_queue[i].status == 2); + + Texture response = texture_load_queue[i].result; + + /* it's a "loner texture," it doesn't fit in an atlas so it's not in one */ + if (response.data->w >= (int)ctx.texture_atlas_size || response.data->h >= (int)ctx.texture_atlas_size) { + if (ctx.game.debug) { + if (response.data->w > 2048 || response.data->h > 2048) + log_warn("Unportable texture dimensions for %s, use 2048x2048 at max", cache->hash[texture_load_queue[i].index].key); + if (!is_power_of_two(response.data->w) || !is_power_of_two(response.data->h)) + log_warn("Unportable texture dimensions for %s, should be powers of 2", cache->hash[texture_load_queue[i].index].key); + } + response.loner_texture = create_gpu_texture(TEXTURE_FILTER_NEAREAST, true); + upload_texture_from_surface(response.loner_texture, response.data); + response.srcrect = (Rect) { .w = (float)response.data->w, .h = (float)response.data->h }; + + } else { + /* will be fully populated as the atlas updates */ + response.atlas_index = cache->atlas_index; + } + + cache->hash[texture_load_queue[i].index].value = response; + } + + arrsetlen(texture_load_queue, 0); + /* this function makes a lot more sense if you read stb_rect_pack.h */ stbrp_context pack_ctx; /* target info */ stbrp_init_target(&pack_ctx, diff --git a/src/twn_textures_c.h b/src/twn_textures_c.h index 8faa607..14e95fb 100644 --- a/src/twn_textures_c.h +++ b/src/twn_textures_c.h @@ -46,7 +46,7 @@ typedef struct TextureCache { bool is_dirty; /* current atlas needs to be recreated */ } TextureCache; -/* type safe structure for persistent texture handles */ +/* type safe structure for frame persistent texture handles */ typedef struct TextureKey { uint16_t id; } TextureKey; /* tests whether given key structure corresponds to any texture */ @@ -56,17 +56,12 @@ typedef struct TextureKey { uint16_t id; } TextureKey; void textures_cache_init(struct TextureCache *cache, SDL_Window *window); void textures_cache_deinit(struct TextureCache *cache); -/* loads an image if it isn't in the cache, otherwise a no-op. */ -/* can be called from anywhere at any time after init, useful if you want to */ -/* preload textures you know will definitely be used */ -// void textures_load(struct texture_cache *cache, const char *path); - /* repacks the current texture atlas based on the texture cache if needed */ /* any previously returned srcrect results are invalidated after that */ /* call it every time before rendering */ void textures_update_atlas(TextureCache *cache); -/* returns a persistent handle to some texture in cache, loading it if needed */ +/* returns a frame persistent handle to some texture in cache, loading it if needed */ /* check the result with m_texture_key_is_valid() */ TextureKey textures_get_key(TextureCache *cache, const char *path); @@ -96,4 +91,7 @@ void textures_reset_state(void); /* warn: surface->pixels must be freed along side the surface itself */ SDL_Surface *textures_load_surface(const char *path); +/* note: will only take an effect after `textures_update_atlas` */ +bool textures_load_workers_thread(void); + #endif diff --git a/src/twn_workers.c b/src/twn_workers.c index e4a1a18..2835be3 100644 --- a/src/twn_workers.c +++ b/src/twn_workers.c @@ -27,8 +27,12 @@ static int worker_thread(void *udata) { if (SDL_SemWaitTimeout(workers_job_semaphore, 100) == SDL_MUTEX_TIMEDOUT) continue; + /* process models, which will trigger texture loads */ if (model_load_workers_thread()) continue; + + if (textures_load_workers_thread()) + continue; } /* let the main thread collect it */