finally properly implemented depth optimization for 2d

2024-10-14 15:31:56 +03:00
parent 3535a185df
commit 19bf88d44e
4 changed files with 127 additions and 13 deletions
--- a/apps/demos/bunnymark/game.c
+++ b/apps/demos/bunnymark/game.c
@@ -30,7 +30,9 @@ static void handle_input(void)
                state->bunnies[state->bunniesCount].speed.x = (float)(rand() % 500 - 250) / 60.0f;
                state->bunnies[state->bunniesCount].speed.y = (float)(rand() % 500 - 250) / 60.0f;
                state->bunnies[state->bunniesCount].color =
-                    (Color){(uint8_t)(rand() % 190 + 50), (uint8_t)(rand() % 160 + 80), (uint8_t)(rand() % 140 + 100), 255};
+                    (Color){(uint8_t)(state->bunniesCount % 190 + 50),
                            (uint8_t)((state->bunniesCount + 120) % 160 + 80),
                            (uint8_t)((state->bunniesCount + 65) % 140 + 100), 255};
                state->bunniesCount++;
            }
        }
@@ -46,7 +48,9 @@ static void handle_input(void)
                state->bunnies[state->bunniesCount].speed.x = (float)(rand() % 500 - 250) / 60.0f;
                state->bunnies[state->bunniesCount].speed.y = (float)(rand() % 500 - 250) / 60.0f;
                state->bunnies[state->bunniesCount].color =
-                    (Color){(uint8_t)(rand() % 190 + 50), (uint8_t)(rand() % 160 + 80), (uint8_t)(rand() % 140 + 100), 255};
+                   (Color){(uint8_t)(state->bunniesCount % 190 + 50),
                            (uint8_t)((state->bunniesCount + 120) % 160 + 80),
                            (uint8_t)((state->bunniesCount + 65) % 140 + 100), 255};
                state->bunniesCount++;
            }
        }
--- a/apps/demos/bunnymark/state.h
+++ b/apps/demos/bunnymark/state.h
@@ -3,7 +3,7 @@
 #include "twn_game_api.h"
-#define MAX_BUNNIES 100000 // 100K bunnies limit
+#define MAX_BUNNIES 500000 // 100K bunnies limit
 #define BUNNY_W 26
 #define BUNNY_H 37
 #define SPRITE_SCALE 1
--- a/src/rendering/twn_draw.c
+++ b/src/rendering/twn_draw.c
@@ -181,42 +181,152 @@ static void render_2d(void) {
    const size_t render_queue_len = arrlenu(ctx.render_queue_2d);
-    size_t batch_count = 0;
+    struct Render2DInvocation {
        Primitive2D const *primitive;
        double layer;
        union {
            struct QuadBatch quad_batch;
        };
    };
    /* first, collect all invocations, while merging into batches where applicable */
    /* we separate into opaque and transparent ones, as it presents optimization opportunities */
    struct Render2DInvocation *opaque_invocations = NULL;
    struct Render2DInvocation *ghostly_invocations = NULL;
    arrsetcap(opaque_invocations, render_queue_len);
    arrsetcap(ghostly_invocations, render_queue_len);
    for (size_t i = 0; i < render_queue_len; ++i) {
        const Primitive2D *current = &ctx.render_queue_2d[i];
        // TODO: https://gamedev.stackexchange.com/questions/101136/using-full-resolution-of-depth-buffer-for-2d-rendering
        double const layer = ((double)((render_queue_len + 1) - i) / (double)(render_queue_len + 1)) * 0.75;
        switch (current->type) {
            case PRIMITIVE_2D_SPRITE: {
                const struct QuadBatch batch =
                    collect_sprite_batch(current, render_queue_len - i);
-                /* TODO: what's even the point? just use OR_EQUAL comparison */
+                struct Render2DInvocation const invocation = {
-                set_depth_range((double)batch_count / UINT16_MAX, 1.0);
+                    .primitive = current,
-                render_sprite_batch(current, batch);
+                    .quad_batch = batch,
                    .layer = layer,
                };
-                i += batch.size - 1; ++batch_count;
+                if (batch.mode == TEXTURE_MODE_GHOSTLY)
                    arrput(ghostly_invocations, invocation);
                else
                    arrput(opaque_invocations, invocation);
                i += batch.size - 1;
                break;
            }
            case PRIMITIVE_2D_RECT: {
                const struct QuadBatch batch =
                    collect_rect_batch(current, render_queue_len - i);
-                render_rect_batch(current, batch);
+                struct Render2DInvocation const invocation = {
                    .primitive = current,
                    .quad_batch = batch,
                    .layer = layer,
                };
-                i += batch.size - 1; ++batch_count;
+                if (batch.mode == TEXTURE_MODE_GHOSTLY)
                    arrput(ghostly_invocations, invocation);
                else
                    arrput(opaque_invocations, invocation);
                i += batch.size - 1;
                break;
            }
            case PRIMITIVE_2D_CIRCLE: {
                struct Render2DInvocation const invocation = {
                    .primitive = current,
                    .layer = layer,
                };
                if (current->circle.color.a != 255)
                    arrput(ghostly_invocations, invocation);
                else
                    arrput(opaque_invocations, invocation);
                break;
            }
            case PRIMITIVE_2D_TEXT: {
                struct Render2DInvocation const invocation = {
                    .primitive = current,
                    .layer = layer,
                };
                arrput(ghostly_invocations, invocation);
                break;
            }
            default:
                SDL_assert(false);
        }
    }
    /* first issue all opaque primitives, front-to-back */
    for (size_t i = 0; i < arrlenu(opaque_invocations); ++i) {
        struct Render2DInvocation const invocation = opaque_invocations[arrlenu(opaque_invocations) - 1 - i];
        /* idea here is to set constant z write that moves further and further along */
        /* with that every batch can early z reject against the previous */
        /* additionally, it will also apply for future transparent passes, sandwitching in-between */
        set_depth_range(invocation.layer, 1.0);
        switch (invocation.primitive->type) {
            case PRIMITIVE_2D_SPRITE: {
                render_sprite_batch(invocation.primitive, invocation.quad_batch);
                break;
            }
            case PRIMITIVE_2D_RECT: {
                render_rect_batch(invocation.primitive, invocation.quad_batch);
                break;
            }
            /* TODO: circle batching */
            case PRIMITIVE_2D_CIRCLE:
-                render_circle(&current->circle);
+                render_circle(&invocation.primitive->circle);
                break;
            case PRIMITIVE_2D_TEXT:
-                render_text(&current->text);
+            default:
                SDL_assert(false);
        }
    }
    /* then issue all transparent primitives, back-to-front */
    for (size_t i = 0; i < arrlenu(ghostly_invocations); ++i) {
        struct Render2DInvocation const invocation = ghostly_invocations[i];
        /* now we use it not for writing layers, but inferring ordering */
        set_depth_range(invocation.layer, 1.0);
        switch (invocation.primitive->type) {
            case PRIMITIVE_2D_SPRITE: {
                render_sprite_batch(invocation.primitive, invocation.quad_batch);
                break;
            }
            case PRIMITIVE_2D_RECT: {
                render_rect_batch(invocation.primitive, invocation.quad_batch);
                break;
            }
            /* TODO: circle batching */
            case PRIMITIVE_2D_CIRCLE:
                render_circle(&invocation.primitive->circle);
                break;
            case PRIMITIVE_2D_TEXT:
                render_text(&invocation.primitive->text);
                break;
            default:
                SDL_assert(false);
        }
    }
    arrfree(opaque_invocations);
    arrfree(ghostly_invocations);
 }
--- a/src/rendering/twn_gl_15_rendering.c
+++ b/src/rendering/twn_gl_15_rendering.c
@@ -219,7 +219,7 @@ void use_texture_mode(TextureMode mode) {
        /* seethrough */
        glNewList(lists + 1, GL_COMPILE); {
            glDisable(GL_BLEND);
-            glDepthFunc(GL_LEQUAL);
+            glDepthFunc(GL_LESS);
            glDepthMask(GL_TRUE);
            glEnable(GL_ALPHA_TEST);
            glAlphaFunc(GL_EQUAL, 1.0f);