From 19bf88d44e7595a92237ebc246b90103f09219b7 Mon Sep 17 00:00:00 2001 From: veclavtalica Date: Mon, 14 Oct 2024 15:31:56 +0300 Subject: [PATCH] finally properly implemented depth optimization for 2d --- apps/demos/bunnymark/game.c | 8 +- apps/demos/bunnymark/state.h | 2 +- src/rendering/twn_draw.c | 128 ++++++++++++++++++++++++++-- src/rendering/twn_gl_15_rendering.c | 2 +- 4 files changed, 127 insertions(+), 13 deletions(-) diff --git a/apps/demos/bunnymark/game.c b/apps/demos/bunnymark/game.c index 9e6094a..b9de4fb 100644 --- a/apps/demos/bunnymark/game.c +++ b/apps/demos/bunnymark/game.c @@ -30,7 +30,9 @@ static void handle_input(void) state->bunnies[state->bunniesCount].speed.x = (float)(rand() % 500 - 250) / 60.0f; state->bunnies[state->bunniesCount].speed.y = (float)(rand() % 500 - 250) / 60.0f; state->bunnies[state->bunniesCount].color = - (Color){(uint8_t)(rand() % 190 + 50), (uint8_t)(rand() % 160 + 80), (uint8_t)(rand() % 140 + 100), 255}; + (Color){(uint8_t)(state->bunniesCount % 190 + 50), + (uint8_t)((state->bunniesCount + 120) % 160 + 80), + (uint8_t)((state->bunniesCount + 65) % 140 + 100), 255}; state->bunniesCount++; } } @@ -46,7 +48,9 @@ static void handle_input(void) state->bunnies[state->bunniesCount].speed.x = (float)(rand() % 500 - 250) / 60.0f; state->bunnies[state->bunniesCount].speed.y = (float)(rand() % 500 - 250) / 60.0f; state->bunnies[state->bunniesCount].color = - (Color){(uint8_t)(rand() % 190 + 50), (uint8_t)(rand() % 160 + 80), (uint8_t)(rand() % 140 + 100), 255}; + (Color){(uint8_t)(state->bunniesCount % 190 + 50), + (uint8_t)((state->bunniesCount + 120) % 160 + 80), + (uint8_t)((state->bunniesCount + 65) % 140 + 100), 255}; state->bunniesCount++; } } diff --git a/apps/demos/bunnymark/state.h b/apps/demos/bunnymark/state.h index 436bbb2..bc5133a 100644 --- a/apps/demos/bunnymark/state.h +++ b/apps/demos/bunnymark/state.h @@ -3,7 +3,7 @@ #include "twn_game_api.h" -#define MAX_BUNNIES 100000 // 100K bunnies limit +#define MAX_BUNNIES 500000 // 100K bunnies limit #define BUNNY_W 26 #define BUNNY_H 37 #define SPRITE_SCALE 1 diff --git a/src/rendering/twn_draw.c b/src/rendering/twn_draw.c index 1732760..bf0ff69 100644 --- a/src/rendering/twn_draw.c +++ b/src/rendering/twn_draw.c @@ -181,42 +181,152 @@ static void render_2d(void) { const size_t render_queue_len = arrlenu(ctx.render_queue_2d); - size_t batch_count = 0; + struct Render2DInvocation { + Primitive2D const *primitive; + double layer; + union { + struct QuadBatch quad_batch; + }; + }; + + /* first, collect all invocations, while merging into batches where applicable */ + /* we separate into opaque and transparent ones, as it presents optimization opportunities */ + struct Render2DInvocation *opaque_invocations = NULL; + struct Render2DInvocation *ghostly_invocations = NULL; + + arrsetcap(opaque_invocations, render_queue_len); + arrsetcap(ghostly_invocations, render_queue_len); for (size_t i = 0; i < render_queue_len; ++i) { const Primitive2D *current = &ctx.render_queue_2d[i]; + // TODO: https://gamedev.stackexchange.com/questions/101136/using-full-resolution-of-depth-buffer-for-2d-rendering + double const layer = ((double)((render_queue_len + 1) - i) / (double)(render_queue_len + 1)) * 0.75; + switch (current->type) { case PRIMITIVE_2D_SPRITE: { const struct QuadBatch batch = collect_sprite_batch(current, render_queue_len - i); - /* TODO: what's even the point? just use OR_EQUAL comparison */ - set_depth_range((double)batch_count / UINT16_MAX, 1.0); - render_sprite_batch(current, batch); + struct Render2DInvocation const invocation = { + .primitive = current, + .quad_batch = batch, + .layer = layer, + }; - i += batch.size - 1; ++batch_count; + if (batch.mode == TEXTURE_MODE_GHOSTLY) + arrput(ghostly_invocations, invocation); + else + arrput(opaque_invocations, invocation); + + i += batch.size - 1; break; } + case PRIMITIVE_2D_RECT: { const struct QuadBatch batch = collect_rect_batch(current, render_queue_len - i); - render_rect_batch(current, batch); + struct Render2DInvocation const invocation = { + .primitive = current, + .quad_batch = batch, + .layer = layer, + }; - i += batch.size - 1; ++batch_count; + if (batch.mode == TEXTURE_MODE_GHOSTLY) + arrput(ghostly_invocations, invocation); + else + arrput(opaque_invocations, invocation); + + i += batch.size - 1; break; } + + case PRIMITIVE_2D_CIRCLE: { + struct Render2DInvocation const invocation = { + .primitive = current, + .layer = layer, + }; + + if (current->circle.color.a != 255) + arrput(ghostly_invocations, invocation); + else + arrput(opaque_invocations, invocation); + break; + } + + case PRIMITIVE_2D_TEXT: { + struct Render2DInvocation const invocation = { + .primitive = current, + .layer = layer, + }; + arrput(ghostly_invocations, invocation); + break; + } + + default: + SDL_assert(false); + } + } + + /* first issue all opaque primitives, front-to-back */ + for (size_t i = 0; i < arrlenu(opaque_invocations); ++i) { + struct Render2DInvocation const invocation = opaque_invocations[arrlenu(opaque_invocations) - 1 - i]; + + /* idea here is to set constant z write that moves further and further along */ + /* with that every batch can early z reject against the previous */ + /* additionally, it will also apply for future transparent passes, sandwitching in-between */ + set_depth_range(invocation.layer, 1.0); + + switch (invocation.primitive->type) { + case PRIMITIVE_2D_SPRITE: { + render_sprite_batch(invocation.primitive, invocation.quad_batch); + break; + } + case PRIMITIVE_2D_RECT: { + render_rect_batch(invocation.primitive, invocation.quad_batch); + break; + } + /* TODO: circle batching */ case PRIMITIVE_2D_CIRCLE: - render_circle(¤t->circle); + render_circle(&invocation.primitive->circle); break; case PRIMITIVE_2D_TEXT: - render_text(¤t->text); + default: + SDL_assert(false); + } + } + + /* then issue all transparent primitives, back-to-front */ + for (size_t i = 0; i < arrlenu(ghostly_invocations); ++i) { + struct Render2DInvocation const invocation = ghostly_invocations[i]; + + /* now we use it not for writing layers, but inferring ordering */ + set_depth_range(invocation.layer, 1.0); + + switch (invocation.primitive->type) { + case PRIMITIVE_2D_SPRITE: { + render_sprite_batch(invocation.primitive, invocation.quad_batch); + break; + } + case PRIMITIVE_2D_RECT: { + render_rect_batch(invocation.primitive, invocation.quad_batch); + break; + } + /* TODO: circle batching */ + case PRIMITIVE_2D_CIRCLE: + render_circle(&invocation.primitive->circle); + break; + case PRIMITIVE_2D_TEXT: + render_text(&invocation.primitive->text); break; default: SDL_assert(false); } } + + arrfree(opaque_invocations); + arrfree(ghostly_invocations); } diff --git a/src/rendering/twn_gl_15_rendering.c b/src/rendering/twn_gl_15_rendering.c index a2cf4cb..efb82dd 100644 --- a/src/rendering/twn_gl_15_rendering.c +++ b/src/rendering/twn_gl_15_rendering.c @@ -219,7 +219,7 @@ void use_texture_mode(TextureMode mode) { /* seethrough */ glNewList(lists + 1, GL_COMPILE); { glDisable(GL_BLEND); - glDepthFunc(GL_LEQUAL); + glDepthFunc(GL_LESS); glDepthMask(GL_TRUE); glEnable(GL_ALPHA_TEST); glAlphaFunc(GL_EQUAL, 1.0f);