wip multithreaded texture load

2025-02-09 07:34:16 +03:00 · 2025-02-09 07:34:16 +03:00 · 5a7d7433d1
commit 5a7d7433d1
parent 037548436d
5 changed files with 158 additions and 52 deletions
--- a/src/rendering/twn_gl_15_rendering.c
+++ b/src/rendering/twn_gl_15_rendering.c
@ -175,7 +175,7 @@ static void finally_use_2d_pipeline(void) {
    pipeline_last_used = PIPELINE_2D;
 }

-
+/* TODO: ensure we minimize depth func switching to enable Hi-Z (hierarchical depth) optimizations */
 static void finally_use_texture_mode(TextureMode mode) {
    if (texture_mode_last_used == mode)
        return;
--- a/src/rendering/twn_model.c
+++ b/src/rendering/twn_model.c
@ -1,6 +1,7 @@
 #include "twn_draw_c.h"
 #include "twn_draw.h"
 #include "twn_workers_c.h"
+#include "twn_textures_c.h"

 #define FAST_OBJ_IMPLEMENTATION
 #define FAST_OBJ_REALLOC SDL_realloc
@ -29,6 +30,7 @@ static struct ModelDrawCommand {
 /* deferred queue of model files to load from worker threads */
 static SDL_mutex *model_load_mutex;
 static char const **model_load_queue;
+static size_t model_load_queued;
 static bool model_load_initialized;

 /* use streaming via callbacks to reduce memory congestion */
@ -55,10 +57,11 @@ static unsigned long model_load_callback_size(void *handle, void *udata) {

 /* TODO: is there a way to do this nicely while locking main thread? */
 /*       sleeping over atomic counter might be good enough i guess */
+/* it's safe to access everything without lock after this returns true and no public api is possible to call */
 static bool model_load_workers_finished(void) {
    bool result;
    SDL_LockMutex(model_load_mutex);
-    result = arrlenu(model_load_queue) == 0;
+    result = model_load_queued == 0;
    SDL_UnlockMutex(model_load_mutex);
    return result;
 }
@ -84,12 +87,12 @@ bool model_load_workers_thread(void) {
        .file_size = model_load_callback_size
    };

-    /* TODO: immediately create jobs for missing textures */
-    fastObjMesh *mesh = fast_obj_read_with_callbacks(load_request, &callbacks, NULL);
+    fastObjMesh *const mesh = fast_obj_read_with_callbacks(load_request, &callbacks, NULL);

    SDL_LockMutex(model_load_mutex);
    struct ModelCacheItem *item = shgetp(model_cache, load_request);
    item->value.mesh = mesh;
+    model_load_queued--;
    SDL_UnlockMutex(model_load_mutex);

    return true;
@ -108,12 +111,14 @@ void draw_model(const char *model,

    struct ModelCacheItem const *item;

+    /* TODO: make it lockless */
    /* if model is missing, queue it up for loading */
    SDL_LockMutex(model_load_mutex);
    if (!(item = shgetp_null(model_cache, model))) {
        model = SDL_strdup(model);
        shput(model_cache, model, (struct ModelCacheItemValue){0});
        arrpush(model_load_queue, model);
+        model_load_queued++;
        SDL_SemPost(workers_job_semaphore);
    } else
        model = item->key;
@ -130,14 +135,14 @@ void draw_model(const char *model,


 void finally_draw_models(void) {
-    while (!model_load_workers_finished()) {
-        (void)0;
-    }
+    while (!model_load_workers_finished())
+        SDL_Delay(1);

    /* TODO: have special path for them, preserving the buffers and potentially using instanced draw */
    for (int i = 0; i < arrlen(model_draw_commands); ++i) {
        struct ModelDrawCommand const *const command = &model_draw_commands[i];
        fastObjMesh const *const mesh = model_cache[shgeti(model_cache, command->model)].value.mesh;
+        SDL_assert(mesh);
        for (unsigned int g = 0; g < mesh->group_count; ++g) {
            fastObjGroup const *const group = &mesh->groups[g];
            unsigned int idx = 0;
@ -208,9 +213,8 @@ void finally_draw_models(void) {

 /* drop model caches */
 void free_model_cache(void) {
-    while (!model_load_workers_finished()) {
-        (void)0;
-    }
+    while (!model_load_workers_finished())
+        SDL_Delay(1);

    for (size_t i = 0; i < shlenu(model_cache); ++i) {
        fast_obj_destroy(model_cache[i].value.mesh);
--- a/src/twn_textures.c
+++ b/src/twn_textures.c
@ -2,6 +2,7 @@
 #include "twn_util.h"
 #include "twn_util_c.h"
 #include "twn_engine_context_c.h"
+#include "twn_workers_c.h"

 #include <SDL2/SDL.h>
 #include <physfs.h>
@ -19,6 +20,18 @@ typedef struct {
 } TextureLoadingContext;


+static SDL_mutex *textures_load_mutex;
+static struct TextureLoadRequest {
+    Texture result; /* will be copied into cache when it's time, freeing us from locking */
+    size_t index; /* index into cache->hash to fill */
+    /* 0 = awaits processing */
+    /* 1 = in processing */
+    /* 2 = success */
+    /* 3 = failure */
+    uint8_t status;
+} *texture_load_queue;
+
+
 static int load_read_callback(void *user, char *data, int size) {
    TextureLoadingContext *context = user;
    int read = (int)SDL_RWread(context->rwops, data, 1, size);
@ -42,7 +55,7 @@ static int load_eof_callback(void *user) {


 static SDL_Surface *missing_texture_surface;
-static uint16_t missing_texture_id;
+static uint16_t missing_texture_id = TEXTURE_KEY_INVALID.id;

 static SDL_Surface *gen_missing_texture_surface(void) {
    Uint32 rmask, gmask, bmask;
@ -57,6 +70,8 @@ static SDL_Surface *gen_missing_texture_surface(void) {
        bmask = 0x00ff0000;
    #endif

+    SDL_LockMutex(textures_load_mutex);
+
    if (!missing_texture_surface) {
        uint8_t *data = SDL_malloc(64 * 64 * 3);
        for (int y = 0; y < 64; ++y) {
@ -74,6 +89,8 @@ static SDL_Surface *gen_missing_texture_surface(void) {
                                                           rmask, gmask, bmask, 0);
    }

+    SDL_UnlockMutex(textures_load_mutex);
+
    return missing_texture_surface;
 }

@ -230,10 +247,17 @@ static void recreate_current_atlas_texture(TextureCache *cache) {
 /* uses the textures currently in the cache to create an array of stbrp_rects */
 static stbrp_rect *create_rects_from_cache(TextureCache *cache) {
    stbrp_rect *rects = NULL;
+    bool missing_texture_used = false;
    for (size_t i = 0; i < shlenu(cache->hash); ++i) {
        if (cache->hash[i].value.loner_texture != 0)
            continue;

+        /* only put it once */
+        if (!missing_texture_used && cache->hash[i].value.data == missing_texture_surface) {
+            missing_texture_used = true;
+            continue;
+        }
+
        const SDL_Surface *surface_data = cache->hash[i].value.data;
        stbrp_rect new_rect = {
            .w = surface_data->w,
@ -318,6 +342,7 @@ void textures_cache_init(TextureCache *cache, SDL_Window *window) {
    sh_new_arena(cache->hash);

    cache->node_buffer = SDL_malloc(ctx.texture_atlas_size * sizeof *cache->node_buffer);
+    textures_load_mutex = SDL_CreateMutex();

    add_new_atlas(cache);
 }
@ -346,6 +371,9 @@ void textures_cache_deinit(TextureCache *cache) {
    }
    shfree(cache->hash);

+    SDL_DestroyMutex(textures_load_mutex);
+    arrfree(texture_load_queue);
+
    SDL_free(cache->node_buffer);
 }

@ -377,49 +405,90 @@ static enum TextureMode infer_texture_mode(SDL_Surface *surface) {
    return result;
 }

+/* offloads surface load and transparency detection from main thread */
+bool textures_load_workers_thread(void) {
+    /* try grabbing some work */
+    ssize_t texture_id = -1;
+    ssize_t queue_index = -1;
+    char *path = NULL; /* copy of a key, as it's not stable in arena */

-static TextureKey textures_load(TextureCache *cache, const char *path) {
-    /* no need to do anything if it was loaded already */
-    const ptrdiff_t i = shgeti(cache->hash, path);
-    if (i >= 0)
-        return (TextureKey){ (uint16_t)i };
+    SDL_LockMutex(textures_load_mutex);
+    for (size_t i = 0; i < arrlenu(texture_load_queue); ++i) {
+        if (texture_load_queue[i].status == 0) {
+            texture_id = texture_load_queue[i].index;
+            path = SDL_strdup(ctx.texture_cache.hash[texture_id].key);
+            texture_load_queue[i].status = 1; /* mark as in process */
+            queue_index = i;
+            break;
+        }
+    }
+    SDL_UnlockMutex(textures_load_mutex);
+    /* nothing to do, bail */
+    if (queue_index == -1)
+        return false;

-    SDL_Surface *surface = textures_load_surface(path);
-    if (surface == missing_texture_surface && missing_texture_id != 0)
-        return (TextureKey){ missing_texture_id };
+    SDL_assert(texture_id != -1 && queue_index != -1);

-    Texture new_texture = {
+    SDL_Surface *const surface = textures_load_surface(path);
+    SDL_free(path);
+
+    Texture const response = {
        .data = surface,
        .mode = infer_texture_mode(surface),
    };

-    /* it's a "loner texture," it doesn't fit in an atlas so it's not in one */
-    if (surface->w >= (int)ctx.texture_atlas_size || surface->h >= (int)ctx.texture_atlas_size) {
-        if (ctx.game.debug) {
-            if (surface->w > 2048 || surface->h > 2048)
-                log_warn("Unportable texture dimensions for %s, use 2048x2048 at max", path);
-            if (!is_power_of_two(surface->w) || !is_power_of_two(surface->h))
-                log_warn("Unportable texture dimensions for %s, should be powers of 2", path);
+    SDL_LockMutex(textures_load_mutex);
+
+    texture_load_queue[queue_index].result = response;
+    texture_load_queue[queue_index].status = 2; /* mark success */
+
+    /* reuse this id in the future, allowing for draw call merging */
+    if (surface == missing_texture_surface && missing_texture_id == TEXTURE_KEY_INVALID.id)
+        missing_texture_id = (uint16_t)texture_id;
+
+    SDL_UnlockMutex(textures_load_mutex);
+
+    return true;
+}
+
+
+static TextureKey textures_load(TextureCache *cache, const char *path) {
+    /* at this point we assume that texture isn't loaded */
+
+    /* place a dummy for future lookups to know it will be loaded */
+    /* as well as a place for worker to fill in */
+    shput(cache->hash, path, (Texture){0});
+
+    /* append a new request, use stable indices */
+    struct TextureLoadRequest const request = {
+        .index = shlenu(cache->hash) - 1,
+    };
+    SDL_LockMutex(textures_load_mutex);
+    arrpush(texture_load_queue, request);
+    SDL_UnlockMutex(textures_load_mutex);
+
+    /* signal work to do */
+    SDL_SemPost(workers_job_semaphore);
+
+    cache->is_dirty = true;
+
+    /* report the newly created slot */
+    return (TextureKey){ (uint16_t)shlenu(cache->hash) - 1 };
+}
+
+
+/* it's safe to access everything without lock after this returns true and no public api is possible to call */
+static bool textures_load_workers_finished(void) {
+    bool result = true;
+    SDL_LockMutex(textures_load_mutex);
+    for (size_t i = 0; i < arrlenu(texture_load_queue); ++i) {
+        if (texture_load_queue[i].status == 0 || texture_load_queue[i].status == 1) {
+            result = false;
+            break;
        }
-        new_texture.loner_texture = create_gpu_texture(TEXTURE_FILTER_NEAREAST, true);
-        upload_texture_from_surface(new_texture.loner_texture, surface); 
-        new_texture.srcrect = (Rect) { .w = (float)surface->w, .h = (float)surface->h };
-
-    } else {
-        /* will be fully populated as the atlas updates */
-        new_texture.atlas_index = cache->atlas_index;
-        cache->is_dirty = true;
    }
-
-    shput(cache->hash, path, new_texture);
-
-    uint16_t const id = (uint16_t)shlenu(cache->hash) - 1;
-
-    /* reuse this id for every later missing texture */
-    if (surface == missing_texture_surface)
-        missing_texture_id = id;
-
-    return (TextureKey){ id };
+    SDL_UnlockMutex(textures_load_mutex);
+    return result;
 }


@ -427,6 +496,37 @@ void textures_update_atlas(TextureCache *cache) {
    if (!cache->is_dirty)
        return;

+    while (!textures_load_workers_finished())
+        SDL_Delay(1);
+
+    /* collect results */
+    for (size_t i = 0; i < arrlenu(texture_load_queue); ++i) {
+        SDL_assert(texture_load_queue[i].status == 2);
+
+        Texture response = texture_load_queue[i].result;
+
+        /* it's a "loner texture," it doesn't fit in an atlas so it's not in one */
+        if (response.data->w >= (int)ctx.texture_atlas_size || response.data->h >= (int)ctx.texture_atlas_size) {
+            if (ctx.game.debug) {
+                if (response.data->w > 2048 || response.data->h > 2048)
+                    log_warn("Unportable texture dimensions for %s, use 2048x2048 at max", cache->hash[texture_load_queue[i].index].key);
+                if (!is_power_of_two(response.data->w) || !is_power_of_two(response.data->h))
+                    log_warn("Unportable texture dimensions for %s, should be powers of 2", cache->hash[texture_load_queue[i].index].key);
+            }
+            response.loner_texture = create_gpu_texture(TEXTURE_FILTER_NEAREAST, true);
+            upload_texture_from_surface(response.loner_texture, response.data);
+            response.srcrect = (Rect) { .w = (float)response.data->w, .h = (float)response.data->h };
+
+        } else {
+            /* will be fully populated as the atlas updates */
+            response.atlas_index = cache->atlas_index;
+        }
+
+        cache->hash[texture_load_queue[i].index].value = response;
+    }
+
+    arrsetlen(texture_load_queue, 0);
+
    /* this function makes a lot more sense if you read stb_rect_pack.h */
    stbrp_context pack_ctx; /* target info */
    stbrp_init_target(&pack_ctx,
--- a/src/twn_textures_c.h
+++ b/src/twn_textures_c.h
@ -46,7 +46,7 @@ typedef struct TextureCache {
    bool is_dirty;               /* current atlas needs to be recreated */
 } TextureCache;

-/* type safe structure for persistent texture handles */
+/* type safe structure for frame persistent texture handles */
 typedef struct TextureKey { uint16_t id; } TextureKey;

 /* tests whether given key structure corresponds to any texture */
@ -56,17 +56,12 @@ typedef struct TextureKey { uint16_t id; } TextureKey;
 void textures_cache_init(struct TextureCache *cache, SDL_Window *window);
 void textures_cache_deinit(struct TextureCache *cache);

-/* loads an image if it isn't in the cache, otherwise a no-op. */
-/* can be called from anywhere at any time after init, useful if you want to */
-/* preload textures you know will definitely be used */
-// void textures_load(struct texture_cache *cache, const char *path);
-
 /* repacks the current texture atlas based on the texture cache if needed */
 /* any previously returned srcrect results are invalidated after that */
 /* call it every time before rendering */
 void textures_update_atlas(TextureCache *cache);

-/* returns a persistent handle to some texture in cache, loading it if needed */
+/* returns a frame persistent handle to some texture in cache, loading it if needed */
 /* check the result with m_texture_key_is_valid() */
 TextureKey textures_get_key(TextureCache *cache, const char *path);

@ -96,4 +91,7 @@ void textures_reset_state(void);
 /* warn: surface->pixels must be freed along side the surface itself */
 SDL_Surface *textures_load_surface(const char *path);

+/* note: will only take an effect after `textures_update_atlas` */
+bool textures_load_workers_thread(void);
+
 #endif
--- a/src/twn_workers.c
+++ b/src/twn_workers.c
@ -27,8 +27,12 @@ static int worker_thread(void *udata) {
        if (SDL_SemWaitTimeout(workers_job_semaphore, 100) == SDL_MUTEX_TIMEDOUT)
            continue;

+        /* process models, which will trigger texture loads  */
        if (model_load_workers_thread())
            continue;
+
+        if (textures_load_workers_thread())
+            continue;
    }

    /* let the main thread collect it */