finally properly implemented depth optimization for 2d

2024-10-14 15:31:56 +03:00 · 2024-10-14 15:31:56 +03:00 · 19bf88d44e
commit 19bf88d44e
parent 3535a185df
4 changed files with 127 additions and 13 deletions
--- a/apps/demos/bunnymark/game.c
+++ b/apps/demos/bunnymark/game.c
@ -30,7 +30,9 @@ static void handle_input(void)
                state->bunnies[state->bunniesCount].speed.x = (float)(rand() % 500 - 250) / 60.0f;
                state->bunnies[state->bunniesCount].speed.y = (float)(rand() % 500 - 250) / 60.0f;
                state->bunnies[state->bunniesCount].color =
-                    (Color){(uint8_t)(rand() % 190 + 50), (uint8_t)(rand() % 160 + 80), (uint8_t)(rand() % 140 + 100), 255};
+                    (Color){(uint8_t)(state->bunniesCount % 190 + 50),
+                            (uint8_t)((state->bunniesCount + 120) % 160 + 80),
+                            (uint8_t)((state->bunniesCount + 65) % 140 + 100), 255};
                state->bunniesCount++;
            }
        }
@ -46,7 +48,9 @@ static void handle_input(void)
                state->bunnies[state->bunniesCount].speed.x = (float)(rand() % 500 - 250) / 60.0f;
                state->bunnies[state->bunniesCount].speed.y = (float)(rand() % 500 - 250) / 60.0f;
                state->bunnies[state->bunniesCount].color =
-                    (Color){(uint8_t)(rand() % 190 + 50), (uint8_t)(rand() % 160 + 80), (uint8_t)(rand() % 140 + 100), 255};
+                   (Color){(uint8_t)(state->bunniesCount % 190 + 50),
+                            (uint8_t)((state->bunniesCount + 120) % 160 + 80),
+                            (uint8_t)((state->bunniesCount + 65) % 140 + 100), 255};
                state->bunniesCount++;
            }
        }
--- a/apps/demos/bunnymark/state.h
+++ b/apps/demos/bunnymark/state.h
@ -3,7 +3,7 @@

 #include "twn_game_api.h"

-#define MAX_BUNNIES 100000 // 100K bunnies limit
+#define MAX_BUNNIES 500000 // 100K bunnies limit
 #define BUNNY_W 26
 #define BUNNY_H 37
 #define SPRITE_SCALE 1
--- a/src/rendering/twn_draw.c
+++ b/src/rendering/twn_draw.c
@ -181,42 +181,152 @@ static void render_2d(void) {

    const size_t render_queue_len = arrlenu(ctx.render_queue_2d);

-    size_t batch_count = 0;
+    struct Render2DInvocation {
+        Primitive2D const *primitive;
+        double layer;
+        union {
+            struct QuadBatch quad_batch;
+        };
+    };
+
+    /* first, collect all invocations, while merging into batches where applicable */
+    /* we separate into opaque and transparent ones, as it presents optimization opportunities */
+    struct Render2DInvocation *opaque_invocations = NULL;
+    struct Render2DInvocation *ghostly_invocations = NULL;
+
+    arrsetcap(opaque_invocations, render_queue_len);
+    arrsetcap(ghostly_invocations, render_queue_len);

    for (size_t i = 0; i < render_queue_len; ++i) {
        const Primitive2D *current = &ctx.render_queue_2d[i];

+        // TODO: https://gamedev.stackexchange.com/questions/101136/using-full-resolution-of-depth-buffer-for-2d-rendering
+        double const layer = ((double)((render_queue_len + 1) - i) / (double)(render_queue_len + 1)) * 0.75;
+
        switch (current->type) {
            case PRIMITIVE_2D_SPRITE: {
                const struct QuadBatch batch =
                    collect_sprite_batch(current, render_queue_len - i);

-                /* TODO: what's even the point? just use OR_EQUAL comparison */
-                set_depth_range((double)batch_count / UINT16_MAX, 1.0);
-                render_sprite_batch(current, batch);
+                struct Render2DInvocation const invocation = {
+                    .primitive = current,
+                    .quad_batch = batch,
+                    .layer = layer,
+                };

-                i += batch.size - 1; ++batch_count;
+                if (batch.mode == TEXTURE_MODE_GHOSTLY)
+                    arrput(ghostly_invocations, invocation);
+                else
+                    arrput(opaque_invocations, invocation);
+
+                i += batch.size - 1;
                break;
            }
+
            case PRIMITIVE_2D_RECT: {
                const struct QuadBatch batch =
                    collect_rect_batch(current, render_queue_len - i);

-                render_rect_batch(current, batch);
+                struct Render2DInvocation const invocation = {
+                    .primitive = current,
+                    .quad_batch = batch,
+                    .layer = layer,
+                };

-                i += batch.size - 1; ++batch_count;
+                if (batch.mode == TEXTURE_MODE_GHOSTLY)
+                    arrput(ghostly_invocations, invocation);
+                else
+                    arrput(opaque_invocations, invocation);
+
+                i += batch.size - 1;
                break;
            }
+
+            case PRIMITIVE_2D_CIRCLE: {
+                struct Render2DInvocation const invocation = {
+                    .primitive = current,
+                    .layer = layer,
+                };
+
+                if (current->circle.color.a != 255)
+                    arrput(ghostly_invocations, invocation);
+                else
+                    arrput(opaque_invocations, invocation);
+                break;
+            }
+
+            case PRIMITIVE_2D_TEXT: {
+                struct Render2DInvocation const invocation = {
+                    .primitive = current,
+                    .layer = layer,
+                };
+                arrput(ghostly_invocations, invocation);
+                break;
+            }
+
+            default:
+                SDL_assert(false);
+        }
+    }
+
+    /* first issue all opaque primitives, front-to-back */
+    for (size_t i = 0; i < arrlenu(opaque_invocations); ++i) {
+        struct Render2DInvocation const invocation = opaque_invocations[arrlenu(opaque_invocations) - 1 - i];
+
+        /* idea here is to set constant z write that moves further and further along */
+        /* with that every batch can early z reject against the previous */
+        /* additionally, it will also apply for future transparent passes, sandwitching in-between */
+        set_depth_range(invocation.layer, 1.0);
+
+        switch (invocation.primitive->type) {
+            case PRIMITIVE_2D_SPRITE: {
+                render_sprite_batch(invocation.primitive, invocation.quad_batch);
+                break;
+            }
+            case PRIMITIVE_2D_RECT: {
+                render_rect_batch(invocation.primitive, invocation.quad_batch);
+                break;
+            }
+            /* TODO: circle batching */
            case PRIMITIVE_2D_CIRCLE:
-                render_circle(&current->circle);
+                render_circle(&invocation.primitive->circle);
                break;
            case PRIMITIVE_2D_TEXT:
-                render_text(&current->text);
+            default:
+                SDL_assert(false);
+        }
+    }
+
+    /* then issue all transparent primitives, back-to-front */
+    for (size_t i = 0; i < arrlenu(ghostly_invocations); ++i) {
+        struct Render2DInvocation const invocation = ghostly_invocations[i];
+
+        /* now we use it not for writing layers, but inferring ordering */
+        set_depth_range(invocation.layer, 1.0);
+
+        switch (invocation.primitive->type) {
+            case PRIMITIVE_2D_SPRITE: {
+                render_sprite_batch(invocation.primitive, invocation.quad_batch);
+                break;
+            }
+            case PRIMITIVE_2D_RECT: {
+                render_rect_batch(invocation.primitive, invocation.quad_batch);
+                break;
+            }
+            /* TODO: circle batching */
+            case PRIMITIVE_2D_CIRCLE:
+                render_circle(&invocation.primitive->circle);
+                break;
+            case PRIMITIVE_2D_TEXT:
+                render_text(&invocation.primitive->text);
                break;
            default:
                SDL_assert(false);
        }
    }
+
+    arrfree(opaque_invocations);
+    arrfree(ghostly_invocations);
 }


--- a/src/rendering/twn_gl_15_rendering.c
+++ b/src/rendering/twn_gl_15_rendering.c
@ -219,7 +219,7 @@ void use_texture_mode(TextureMode mode) {
        /* seethrough */
        glNewList(lists + 1, GL_COMPILE); {
            glDisable(GL_BLEND);
-            glDepthFunc(GL_LEQUAL);
+            glDepthFunc(GL_LESS);
            glDepthMask(GL_TRUE);
            glEnable(GL_ALPHA_TEST);
            glAlphaFunc(GL_EQUAL, 1.0f);