finally properly implemented depth optimization for 2d

This commit is contained in:
veclav talica 2024-10-14 15:31:56 +03:00
parent 3535a185df
commit 19bf88d44e
4 changed files with 127 additions and 13 deletions

View File

@ -30,7 +30,9 @@ static void handle_input(void)
state->bunnies[state->bunniesCount].speed.x = (float)(rand() % 500 - 250) / 60.0f; state->bunnies[state->bunniesCount].speed.x = (float)(rand() % 500 - 250) / 60.0f;
state->bunnies[state->bunniesCount].speed.y = (float)(rand() % 500 - 250) / 60.0f; state->bunnies[state->bunniesCount].speed.y = (float)(rand() % 500 - 250) / 60.0f;
state->bunnies[state->bunniesCount].color = state->bunnies[state->bunniesCount].color =
(Color){(uint8_t)(rand() % 190 + 50), (uint8_t)(rand() % 160 + 80), (uint8_t)(rand() % 140 + 100), 255}; (Color){(uint8_t)(state->bunniesCount % 190 + 50),
(uint8_t)((state->bunniesCount + 120) % 160 + 80),
(uint8_t)((state->bunniesCount + 65) % 140 + 100), 255};
state->bunniesCount++; state->bunniesCount++;
} }
} }
@ -46,7 +48,9 @@ static void handle_input(void)
state->bunnies[state->bunniesCount].speed.x = (float)(rand() % 500 - 250) / 60.0f; state->bunnies[state->bunniesCount].speed.x = (float)(rand() % 500 - 250) / 60.0f;
state->bunnies[state->bunniesCount].speed.y = (float)(rand() % 500 - 250) / 60.0f; state->bunnies[state->bunniesCount].speed.y = (float)(rand() % 500 - 250) / 60.0f;
state->bunnies[state->bunniesCount].color = state->bunnies[state->bunniesCount].color =
(Color){(uint8_t)(rand() % 190 + 50), (uint8_t)(rand() % 160 + 80), (uint8_t)(rand() % 140 + 100), 255}; (Color){(uint8_t)(state->bunniesCount % 190 + 50),
(uint8_t)((state->bunniesCount + 120) % 160 + 80),
(uint8_t)((state->bunniesCount + 65) % 140 + 100), 255};
state->bunniesCount++; state->bunniesCount++;
} }
} }

View File

@ -3,7 +3,7 @@
#include "twn_game_api.h" #include "twn_game_api.h"
#define MAX_BUNNIES 100000 // 100K bunnies limit #define MAX_BUNNIES 500000 // 100K bunnies limit
#define BUNNY_W 26 #define BUNNY_W 26
#define BUNNY_H 37 #define BUNNY_H 37
#define SPRITE_SCALE 1 #define SPRITE_SCALE 1

View File

@ -181,42 +181,152 @@ static void render_2d(void) {
const size_t render_queue_len = arrlenu(ctx.render_queue_2d); const size_t render_queue_len = arrlenu(ctx.render_queue_2d);
size_t batch_count = 0; struct Render2DInvocation {
Primitive2D const *primitive;
double layer;
union {
struct QuadBatch quad_batch;
};
};
/* first, collect all invocations, while merging into batches where applicable */
/* we separate into opaque and transparent ones, as it presents optimization opportunities */
struct Render2DInvocation *opaque_invocations = NULL;
struct Render2DInvocation *ghostly_invocations = NULL;
arrsetcap(opaque_invocations, render_queue_len);
arrsetcap(ghostly_invocations, render_queue_len);
for (size_t i = 0; i < render_queue_len; ++i) { for (size_t i = 0; i < render_queue_len; ++i) {
const Primitive2D *current = &ctx.render_queue_2d[i]; const Primitive2D *current = &ctx.render_queue_2d[i];
// TODO: https://gamedev.stackexchange.com/questions/101136/using-full-resolution-of-depth-buffer-for-2d-rendering
double const layer = ((double)((render_queue_len + 1) - i) / (double)(render_queue_len + 1)) * 0.75;
switch (current->type) { switch (current->type) {
case PRIMITIVE_2D_SPRITE: { case PRIMITIVE_2D_SPRITE: {
const struct QuadBatch batch = const struct QuadBatch batch =
collect_sprite_batch(current, render_queue_len - i); collect_sprite_batch(current, render_queue_len - i);
/* TODO: what's even the point? just use OR_EQUAL comparison */ struct Render2DInvocation const invocation = {
set_depth_range((double)batch_count / UINT16_MAX, 1.0); .primitive = current,
render_sprite_batch(current, batch); .quad_batch = batch,
.layer = layer,
};
i += batch.size - 1; ++batch_count; if (batch.mode == TEXTURE_MODE_GHOSTLY)
arrput(ghostly_invocations, invocation);
else
arrput(opaque_invocations, invocation);
i += batch.size - 1;
break; break;
} }
case PRIMITIVE_2D_RECT: { case PRIMITIVE_2D_RECT: {
const struct QuadBatch batch = const struct QuadBatch batch =
collect_rect_batch(current, render_queue_len - i); collect_rect_batch(current, render_queue_len - i);
render_rect_batch(current, batch); struct Render2DInvocation const invocation = {
.primitive = current,
.quad_batch = batch,
.layer = layer,
};
i += batch.size - 1; ++batch_count; if (batch.mode == TEXTURE_MODE_GHOSTLY)
arrput(ghostly_invocations, invocation);
else
arrput(opaque_invocations, invocation);
i += batch.size - 1;
break; break;
} }
case PRIMITIVE_2D_CIRCLE: {
struct Render2DInvocation const invocation = {
.primitive = current,
.layer = layer,
};
if (current->circle.color.a != 255)
arrput(ghostly_invocations, invocation);
else
arrput(opaque_invocations, invocation);
break;
}
case PRIMITIVE_2D_TEXT: {
struct Render2DInvocation const invocation = {
.primitive = current,
.layer = layer,
};
arrput(ghostly_invocations, invocation);
break;
}
default:
SDL_assert(false);
}
}
/* first issue all opaque primitives, front-to-back */
for (size_t i = 0; i < arrlenu(opaque_invocations); ++i) {
struct Render2DInvocation const invocation = opaque_invocations[arrlenu(opaque_invocations) - 1 - i];
/* idea here is to set constant z write that moves further and further along */
/* with that every batch can early z reject against the previous */
/* additionally, it will also apply for future transparent passes, sandwitching in-between */
set_depth_range(invocation.layer, 1.0);
switch (invocation.primitive->type) {
case PRIMITIVE_2D_SPRITE: {
render_sprite_batch(invocation.primitive, invocation.quad_batch);
break;
}
case PRIMITIVE_2D_RECT: {
render_rect_batch(invocation.primitive, invocation.quad_batch);
break;
}
/* TODO: circle batching */
case PRIMITIVE_2D_CIRCLE: case PRIMITIVE_2D_CIRCLE:
render_circle(&current->circle); render_circle(&invocation.primitive->circle);
break; break;
case PRIMITIVE_2D_TEXT: case PRIMITIVE_2D_TEXT:
render_text(&current->text); default:
SDL_assert(false);
}
}
/* then issue all transparent primitives, back-to-front */
for (size_t i = 0; i < arrlenu(ghostly_invocations); ++i) {
struct Render2DInvocation const invocation = ghostly_invocations[i];
/* now we use it not for writing layers, but inferring ordering */
set_depth_range(invocation.layer, 1.0);
switch (invocation.primitive->type) {
case PRIMITIVE_2D_SPRITE: {
render_sprite_batch(invocation.primitive, invocation.quad_batch);
break;
}
case PRIMITIVE_2D_RECT: {
render_rect_batch(invocation.primitive, invocation.quad_batch);
break;
}
/* TODO: circle batching */
case PRIMITIVE_2D_CIRCLE:
render_circle(&invocation.primitive->circle);
break;
case PRIMITIVE_2D_TEXT:
render_text(&invocation.primitive->text);
break; break;
default: default:
SDL_assert(false); SDL_assert(false);
} }
} }
arrfree(opaque_invocations);
arrfree(ghostly_invocations);
} }

View File

@ -219,7 +219,7 @@ void use_texture_mode(TextureMode mode) {
/* seethrough */ /* seethrough */
glNewList(lists + 1, GL_COMPILE); { glNewList(lists + 1, GL_COMPILE); {
glDisable(GL_BLEND); glDisable(GL_BLEND);
glDepthFunc(GL_LEQUAL); glDepthFunc(GL_LESS);
glDepthMask(GL_TRUE); glDepthMask(GL_TRUE);
glEnable(GL_ALPHA_TEST); glEnable(GL_ALPHA_TEST);
glAlphaFunc(GL_EQUAL, 1.0f); glAlphaFunc(GL_EQUAL, 1.0f);