From ea4d12212cc2d128119b725b67a51664c06663c7 Mon Sep 17 00:00:00 2001 From: veclavtalica Date: Sun, 28 Jul 2024 14:39:23 +0300 Subject: [PATCH] rendering.c: fast cos from sin calculation, with lossy fast_sqrt() --- src/rendering/sprites.h | 178 ++++++++++++++++++++-------------------- src/system/linux/elf.c | 82 +++++++++--------- src/util.h | 23 ++++++ 3 files changed, 154 insertions(+), 129 deletions(-) diff --git a/src/rendering/sprites.h b/src/rendering/sprites.h index 0a7fb79..45cd429 100644 --- a/src/rendering/sprites.h +++ b/src/rendering/sprites.h @@ -5,6 +5,7 @@ #include "../textures.h" #include "../rendering.h" #include "../context.h" +#include "../util.h" #include "quad_element_buffer.h" #include @@ -19,7 +20,7 @@ * before anything is really rendered */ /* TODO: it might make sense to infer alpha channel presence / meaningfulness for textures in atlas */ -/* so that they are rendered with no blend / batched in a way to reduce overdraw automatically */ +/* so that they are rendered with no blend / batched in a way to reduce overdraw automatically */ void push_sprite(char *path, t_frect rect) { struct sprite_primitive sprite = { .rect = rect, @@ -104,7 +105,7 @@ static void render_sprites(const struct primitive_2d primitives[], const size_t len, const bool reversed) { - /* single vertex array is used for every batch with NULL glBufferData() trick at the end */ + /* single vertex array is used for every batch with NULL glBufferData() trick at the end */ static GLuint vertex_array = 0; if (vertex_array == 0) glGenBuffers(1, &vertex_array); @@ -128,103 +129,104 @@ static void render_sprites(const struct primitive_2d primitives[], const size_t cur = reversed ? len - i - 1: i; const struct sprite_primitive sprite = primitives[cur].sprite; - const t_rect srcrect = - textures_get_srcrect(&ctx.texture_cache, primitives[cur].sprite.texture_key); + const t_rect srcrect = + textures_get_srcrect(&ctx.texture_cache, primitives[cur].sprite.texture_key); - const float wr = (float)srcrect.w / (float)dims.w; - const float hr = (float)srcrect.h / (float)dims.h; - const float xr = (float)srcrect.x / (float)dims.w; - const float yr = (float)srcrect.y / (float)dims.h; + const float wr = (float)srcrect.w / (float)dims.w; + const float hr = (float)srcrect.h / (float)dims.h; + const float xr = (float)srcrect.x / (float)dims.w; + const float yr = (float)srcrect.y / (float)dims.h; - /* non-rotated case */ - if (sprite.rotation == 0.0f) { - payload[i] = (struct sprite_primitive_payload) { - /* upper-left */ - .v0 = { - sprite.rect.x, - sprite.rect.y }, - .uv0 = { - xr + wr * sprite.flip_x, - yr + hr * sprite.flip_y, }, + /* non-rotated case */ + if (sprite.rotation == 0.0f) { + payload[i] = (struct sprite_primitive_payload) { + /* upper-left */ + .v0 = { + sprite.rect.x, + sprite.rect.y }, + .uv0 = { + xr + wr * sprite.flip_x, + yr + hr * sprite.flip_y, }, - /* bottom-left */ - .v1 = { - (sprite.rect.x), - (sprite.rect.y + sprite.rect.h) }, - .uv1 = { - xr + wr * sprite.flip_x, - yr + hr * !sprite.flip_y, }, + /* bottom-left */ + .v1 = { + (sprite.rect.x), + (sprite.rect.y + sprite.rect.h) }, + .uv1 = { + xr + wr * sprite.flip_x, + yr + hr * !sprite.flip_y, }, - /* bottom-right */ - .v2 = { - (sprite.rect.x + sprite.rect.w), - (sprite.rect.y + sprite.rect.h) }, - .uv2 = { - xr + wr * !sprite.flip_x, - yr + hr * !sprite.flip_y, }, + /* bottom-right */ + .v2 = { + (sprite.rect.x + sprite.rect.w), + (sprite.rect.y + sprite.rect.h) }, + .uv2 = { + xr + wr * !sprite.flip_x, + yr + hr * !sprite.flip_y, }, - /* upper-right */ - .v3 = { - (sprite.rect.x + sprite.rect.w), - (sprite.rect.y) }, - .uv3 = { - xr + wr * !sprite.flip_x, - yr + hr * sprite.flip_y, }, + /* upper-right */ + .v3 = { + (sprite.rect.x + sprite.rect.w), + (sprite.rect.y) }, + .uv3 = { + xr + wr * !sprite.flip_x, + yr + hr * sprite.flip_y, }, - /* equal for all (flat shaded) */ - .c0 = sprite.color, - .c1 = sprite.color, - .c2 = sprite.color, - .c3 = sprite.color, - }; - } else { - /* rotated case */ - const t_fvec2 c = frect_center(sprite.rect); - const t_fvec2 d = { - .x = (cosf(sprite.rotation + (float)M_PI_4) * sprite.rect.w) * (float)M_SQRT1_2, - .y = (sinf(sprite.rotation + (float)M_PI_4) * sprite.rect.h) * (float)M_SQRT1_2, - }; + /* equal for all (flat shaded) */ + .c0 = sprite.color, + .c1 = sprite.color, + .c2 = sprite.color, + .c3 = sprite.color, + }; + } else { + /* rotated case */ + const t_fvec2 c = frect_center(sprite.rect); + const t_fvec2 t = fast_cossine(sprite.rotation + (float)M_PI_4); + const t_fvec2 d = { + .x = t.x * sprite.rect.w * (float)M_SQRT1_2, + .y = t.y * sprite.rect.h * (float)M_SQRT1_2, + }; - payload[i] = (struct sprite_primitive_payload) { - /* upper-left */ - .v0 = { - c.x - d.x, - c.y - d.y }, - .uv0 = { - xr + wr * sprite.flip_x, - yr + hr * sprite.flip_y, }, + payload[i] = (struct sprite_primitive_payload) { + /* upper-left */ + .v0 = { + c.x - d.x, + c.y - d.y }, + .uv0 = { + xr + wr * sprite.flip_x, + yr + hr * sprite.flip_y, }, - /* bottom-left */ - .v1 = { - c.x - d.y, - c.y + d.x }, - .uv1 = { - xr + wr * sprite.flip_x, - yr + hr * !sprite.flip_y, }, + /* bottom-left */ + .v1 = { + c.x - d.y, + c.y + d.x }, + .uv1 = { + xr + wr * sprite.flip_x, + yr + hr * !sprite.flip_y, }, - /* bottom-right */ - .v2 = { - c.x + d.x, - c.y + d.y }, - .uv2 = { - xr + wr * !sprite.flip_x, - yr + hr * !sprite.flip_y, }, + /* bottom-right */ + .v2 = { + c.x + d.x, + c.y + d.y }, + .uv2 = { + xr + wr * !sprite.flip_x, + yr + hr * !sprite.flip_y, }, - /* upper-right */ - .v3 = { - c.x + d.y, - c.y - d.x }, - .uv3 = { - xr + wr * !sprite.flip_x, - yr + hr * sprite.flip_y, }, + /* upper-right */ + .v3 = { + c.x + d.y, + c.y - d.x }, + .uv3 = { + xr + wr * !sprite.flip_x, + yr + hr * sprite.flip_y, }, - /* equal for all (flat shaded) */ - .c0 = sprite.color, - .c1 = sprite.color, - .c2 = sprite.color, - .c3 = sprite.color, - }; - } + /* equal for all (flat shaded) */ + .c0 = sprite.color, + .c1 = sprite.color, + .c2 = sprite.color, + .c3 = sprite.color, + }; + } } glUnmapBuffer(GL_ARRAY_BUFFER); diff --git a/src/system/linux/elf.c b/src/system/linux/elf.c index 7377617..9696c2f 100644 --- a/src/system/linux/elf.c +++ b/src/system/linux/elf.c @@ -12,57 +12,57 @@ #include bool infer_elf_section_bounds(const char *const restrict name, - const char **restrict vm_start, - const char **restrict vm_end) + const char **restrict vm_start, + const char **restrict vm_end) { - bool result = false; - char buf[PATH_MAX]; - ssize_t l = readlink("/proc/self/exe", buf, PATH_MAX); - if (l == -1) - goto ERR_CANT_READLINK; - buf[l] = 0; /* readlink() doesn't write a terminator */ + bool result = false; + char buf[PATH_MAX]; + ssize_t l = readlink("/proc/self/exe", buf, PATH_MAX); + if (l == -1) + goto ERR_CANT_READLINK; + buf[l] = 0; /* readlink() doesn't write a terminator */ - int elf = open(buf, O_RDONLY); - if (elf == -1) - goto ERR_CANT_OPEN_SELF; + int elf = open(buf, O_RDONLY); + if (elf == -1) + goto ERR_CANT_OPEN_SELF; - /* elf header */ - Elf64_Ehdr ehdr; - read(elf, &ehdr, sizeof ehdr); - if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || - ehdr.e_ident[EI_MAG1] != ELFMAG1 || - ehdr.e_ident[EI_MAG2] != ELFMAG2 || - ehdr.e_ident[EI_MAG3] != ELFMAG3) - goto ERR_NOT_ELF; + /* elf header */ + Elf64_Ehdr ehdr; + read(elf, &ehdr, sizeof ehdr); + if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || + ehdr.e_ident[EI_MAG1] != ELFMAG1 || + ehdr.e_ident[EI_MAG2] != ELFMAG2 || + ehdr.e_ident[EI_MAG3] != ELFMAG3) + goto ERR_NOT_ELF; - /* section header string table */ - Elf64_Shdr shstrdr; - lseek(elf, ehdr.e_shoff + ehdr.e_shstrndx * sizeof (Elf64_Shdr), SEEK_SET); - read(elf, &shstrdr, sizeof shstrdr); - char *sh = malloc(shstrdr.sh_size); - lseek(elf, shstrdr.sh_offset, SEEK_SET); - read(elf, sh, shstrdr.sh_size); + /* section header string table */ + Elf64_Shdr shstrdr; + lseek(elf, ehdr.e_shoff + ehdr.e_shstrndx * sizeof (Elf64_Shdr), SEEK_SET); + read(elf, &shstrdr, sizeof shstrdr); + char *sh = malloc(shstrdr.sh_size); + lseek(elf, shstrdr.sh_offset, SEEK_SET); + read(elf, sh, shstrdr.sh_size); - /* walk sections searching for needed name */ - lseek(elf, ehdr.e_shoff, SEEK_SET); - for (size_t s = 0; s < ehdr.e_shnum; ++s) { - Elf64_Shdr shdr; - read(elf, &shdr, sizeof shdr); + /* walk sections searching for needed name */ + lseek(elf, ehdr.e_shoff, SEEK_SET); + for (size_t s = 0; s < ehdr.e_shnum; ++s) { + Elf64_Shdr shdr; + read(elf, &shdr, sizeof shdr); - if (strcmp(&sh[shdr.sh_name], name) == 0) { - result = true; - *vm_start = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr; - *vm_end = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr + shdr.sh_size; - break; - } - } + if (strcmp(&sh[shdr.sh_name], name) == 0) { + result = true; + *vm_start = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr; + *vm_end = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr + shdr.sh_size; + break; + } + } - free(sh); + free(sh); ERR_NOT_ELF: - close(elf); + close(elf); ERR_CANT_OPEN_SELF: ERR_CANT_READLINK: - return result; + return result; } diff --git a/src/util.h b/src/util.h index 98b2fae..10b02af 100644 --- a/src/util.h +++ b/src/util.h @@ -154,5 +154,28 @@ void tick_ftimer(float *value); /* returns true if value was cycled */ bool repeat_ftimer(float *value, float at); +/* http://www.azillionmonkeys.com/qed/sqroot.html */ +static inline float fast_sqrt(float x) +{ + union { + float f; + uint32_t u; + } pun = {.f = x}; + + pun.u += 127 << 23; + pun.u >>= 1; + + return pun.f; +} + + +static inline t_fvec2 fast_cossine(float a) { + const float s = sinf(a); + return (t_fvec2){ + .x = fast_sqrt(1.0f - s * s) * (a >= (float)M_PI_2 && a < (float)(M_PI + M_PI_2) ? -1 : 1), + .y = s + }; +} + #endif