rendering.c: fast cos from sin calculation, with lossy fast_sqrt()

This commit is contained in:
veclav talica 2024-07-28 14:39:23 +03:00
parent c59708d619
commit ea4d12212c
3 changed files with 154 additions and 129 deletions

View File

@ -5,6 +5,7 @@
#include "../textures.h" #include "../textures.h"
#include "../rendering.h" #include "../rendering.h"
#include "../context.h" #include "../context.h"
#include "../util.h"
#include "quad_element_buffer.h" #include "quad_element_buffer.h"
#include <stb_ds.h> #include <stb_ds.h>
@ -19,7 +20,7 @@
* before anything is really rendered * before anything is really rendered
*/ */
/* TODO: it might make sense to infer alpha channel presence / meaningfulness for textures in atlas */ /* TODO: it might make sense to infer alpha channel presence / meaningfulness for textures in atlas */
/* so that they are rendered with no blend / batched in a way to reduce overdraw automatically */ /* so that they are rendered with no blend / batched in a way to reduce overdraw automatically */
void push_sprite(char *path, t_frect rect) { void push_sprite(char *path, t_frect rect) {
struct sprite_primitive sprite = { struct sprite_primitive sprite = {
.rect = rect, .rect = rect,
@ -104,7 +105,7 @@ static void render_sprites(const struct primitive_2d primitives[],
const size_t len, const size_t len,
const bool reversed) const bool reversed)
{ {
/* single vertex array is used for every batch with NULL glBufferData() trick at the end */ /* single vertex array is used for every batch with NULL glBufferData() trick at the end */
static GLuint vertex_array = 0; static GLuint vertex_array = 0;
if (vertex_array == 0) if (vertex_array == 0)
glGenBuffers(1, &vertex_array); glGenBuffers(1, &vertex_array);
@ -128,103 +129,104 @@ static void render_sprites(const struct primitive_2d primitives[],
const size_t cur = reversed ? len - i - 1: i; const size_t cur = reversed ? len - i - 1: i;
const struct sprite_primitive sprite = primitives[cur].sprite; const struct sprite_primitive sprite = primitives[cur].sprite;
const t_rect srcrect = const t_rect srcrect =
textures_get_srcrect(&ctx.texture_cache, primitives[cur].sprite.texture_key); textures_get_srcrect(&ctx.texture_cache, primitives[cur].sprite.texture_key);
const float wr = (float)srcrect.w / (float)dims.w; const float wr = (float)srcrect.w / (float)dims.w;
const float hr = (float)srcrect.h / (float)dims.h; const float hr = (float)srcrect.h / (float)dims.h;
const float xr = (float)srcrect.x / (float)dims.w; const float xr = (float)srcrect.x / (float)dims.w;
const float yr = (float)srcrect.y / (float)dims.h; const float yr = (float)srcrect.y / (float)dims.h;
/* non-rotated case */ /* non-rotated case */
if (sprite.rotation == 0.0f) { if (sprite.rotation == 0.0f) {
payload[i] = (struct sprite_primitive_payload) { payload[i] = (struct sprite_primitive_payload) {
/* upper-left */ /* upper-left */
.v0 = { .v0 = {
sprite.rect.x, sprite.rect.x,
sprite.rect.y }, sprite.rect.y },
.uv0 = { .uv0 = {
xr + wr * sprite.flip_x, xr + wr * sprite.flip_x,
yr + hr * sprite.flip_y, }, yr + hr * sprite.flip_y, },
/* bottom-left */ /* bottom-left */
.v1 = { .v1 = {
(sprite.rect.x), (sprite.rect.x),
(sprite.rect.y + sprite.rect.h) }, (sprite.rect.y + sprite.rect.h) },
.uv1 = { .uv1 = {
xr + wr * sprite.flip_x, xr + wr * sprite.flip_x,
yr + hr * !sprite.flip_y, }, yr + hr * !sprite.flip_y, },
/* bottom-right */ /* bottom-right */
.v2 = { .v2 = {
(sprite.rect.x + sprite.rect.w), (sprite.rect.x + sprite.rect.w),
(sprite.rect.y + sprite.rect.h) }, (sprite.rect.y + sprite.rect.h) },
.uv2 = { .uv2 = {
xr + wr * !sprite.flip_x, xr + wr * !sprite.flip_x,
yr + hr * !sprite.flip_y, }, yr + hr * !sprite.flip_y, },
/* upper-right */ /* upper-right */
.v3 = { .v3 = {
(sprite.rect.x + sprite.rect.w), (sprite.rect.x + sprite.rect.w),
(sprite.rect.y) }, (sprite.rect.y) },
.uv3 = { .uv3 = {
xr + wr * !sprite.flip_x, xr + wr * !sprite.flip_x,
yr + hr * sprite.flip_y, }, yr + hr * sprite.flip_y, },
/* equal for all (flat shaded) */ /* equal for all (flat shaded) */
.c0 = sprite.color, .c0 = sprite.color,
.c1 = sprite.color, .c1 = sprite.color,
.c2 = sprite.color, .c2 = sprite.color,
.c3 = sprite.color, .c3 = sprite.color,
}; };
} else { } else {
/* rotated case */ /* rotated case */
const t_fvec2 c = frect_center(sprite.rect); const t_fvec2 c = frect_center(sprite.rect);
const t_fvec2 d = { const t_fvec2 t = fast_cossine(sprite.rotation + (float)M_PI_4);
.x = (cosf(sprite.rotation + (float)M_PI_4) * sprite.rect.w) * (float)M_SQRT1_2, const t_fvec2 d = {
.y = (sinf(sprite.rotation + (float)M_PI_4) * sprite.rect.h) * (float)M_SQRT1_2, .x = t.x * sprite.rect.w * (float)M_SQRT1_2,
}; .y = t.y * sprite.rect.h * (float)M_SQRT1_2,
};
payload[i] = (struct sprite_primitive_payload) { payload[i] = (struct sprite_primitive_payload) {
/* upper-left */ /* upper-left */
.v0 = { .v0 = {
c.x - d.x, c.x - d.x,
c.y - d.y }, c.y - d.y },
.uv0 = { .uv0 = {
xr + wr * sprite.flip_x, xr + wr * sprite.flip_x,
yr + hr * sprite.flip_y, }, yr + hr * sprite.flip_y, },
/* bottom-left */ /* bottom-left */
.v1 = { .v1 = {
c.x - d.y, c.x - d.y,
c.y + d.x }, c.y + d.x },
.uv1 = { .uv1 = {
xr + wr * sprite.flip_x, xr + wr * sprite.flip_x,
yr + hr * !sprite.flip_y, }, yr + hr * !sprite.flip_y, },
/* bottom-right */ /* bottom-right */
.v2 = { .v2 = {
c.x + d.x, c.x + d.x,
c.y + d.y }, c.y + d.y },
.uv2 = { .uv2 = {
xr + wr * !sprite.flip_x, xr + wr * !sprite.flip_x,
yr + hr * !sprite.flip_y, }, yr + hr * !sprite.flip_y, },
/* upper-right */ /* upper-right */
.v3 = { .v3 = {
c.x + d.y, c.x + d.y,
c.y - d.x }, c.y - d.x },
.uv3 = { .uv3 = {
xr + wr * !sprite.flip_x, xr + wr * !sprite.flip_x,
yr + hr * sprite.flip_y, }, yr + hr * sprite.flip_y, },
/* equal for all (flat shaded) */ /* equal for all (flat shaded) */
.c0 = sprite.color, .c0 = sprite.color,
.c1 = sprite.color, .c1 = sprite.color,
.c2 = sprite.color, .c2 = sprite.color,
.c3 = sprite.color, .c3 = sprite.color,
}; };
} }
} }
glUnmapBuffer(GL_ARRAY_BUFFER); glUnmapBuffer(GL_ARRAY_BUFFER);

View File

@ -12,57 +12,57 @@
#include <string.h> #include <string.h>
bool infer_elf_section_bounds(const char *const restrict name, bool infer_elf_section_bounds(const char *const restrict name,
const char **restrict vm_start, const char **restrict vm_start,
const char **restrict vm_end) const char **restrict vm_end)
{ {
bool result = false; bool result = false;
char buf[PATH_MAX]; char buf[PATH_MAX];
ssize_t l = readlink("/proc/self/exe", buf, PATH_MAX); ssize_t l = readlink("/proc/self/exe", buf, PATH_MAX);
if (l == -1) if (l == -1)
goto ERR_CANT_READLINK; goto ERR_CANT_READLINK;
buf[l] = 0; /* readlink() doesn't write a terminator */ buf[l] = 0; /* readlink() doesn't write a terminator */
int elf = open(buf, O_RDONLY); int elf = open(buf, O_RDONLY);
if (elf == -1) if (elf == -1)
goto ERR_CANT_OPEN_SELF; goto ERR_CANT_OPEN_SELF;
/* elf header */ /* elf header */
Elf64_Ehdr ehdr; Elf64_Ehdr ehdr;
read(elf, &ehdr, sizeof ehdr); read(elf, &ehdr, sizeof ehdr);
if (ehdr.e_ident[EI_MAG0] != ELFMAG0 || if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
ehdr.e_ident[EI_MAG1] != ELFMAG1 || ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
ehdr.e_ident[EI_MAG2] != ELFMAG2 || ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
ehdr.e_ident[EI_MAG3] != ELFMAG3) ehdr.e_ident[EI_MAG3] != ELFMAG3)
goto ERR_NOT_ELF; goto ERR_NOT_ELF;
/* section header string table */ /* section header string table */
Elf64_Shdr shstrdr; Elf64_Shdr shstrdr;
lseek(elf, ehdr.e_shoff + ehdr.e_shstrndx * sizeof (Elf64_Shdr), SEEK_SET); lseek(elf, ehdr.e_shoff + ehdr.e_shstrndx * sizeof (Elf64_Shdr), SEEK_SET);
read(elf, &shstrdr, sizeof shstrdr); read(elf, &shstrdr, sizeof shstrdr);
char *sh = malloc(shstrdr.sh_size); char *sh = malloc(shstrdr.sh_size);
lseek(elf, shstrdr.sh_offset, SEEK_SET); lseek(elf, shstrdr.sh_offset, SEEK_SET);
read(elf, sh, shstrdr.sh_size); read(elf, sh, shstrdr.sh_size);
/* walk sections searching for needed name */ /* walk sections searching for needed name */
lseek(elf, ehdr.e_shoff, SEEK_SET); lseek(elf, ehdr.e_shoff, SEEK_SET);
for (size_t s = 0; s < ehdr.e_shnum; ++s) { for (size_t s = 0; s < ehdr.e_shnum; ++s) {
Elf64_Shdr shdr; Elf64_Shdr shdr;
read(elf, &shdr, sizeof shdr); read(elf, &shdr, sizeof shdr);
if (strcmp(&sh[shdr.sh_name], name) == 0) { if (strcmp(&sh[shdr.sh_name], name) == 0) {
result = true; result = true;
*vm_start = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr; *vm_start = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr;
*vm_end = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr + shdr.sh_size; *vm_end = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr + shdr.sh_size;
break; break;
} }
} }
free(sh); free(sh);
ERR_NOT_ELF: ERR_NOT_ELF:
close(elf); close(elf);
ERR_CANT_OPEN_SELF: ERR_CANT_OPEN_SELF:
ERR_CANT_READLINK: ERR_CANT_READLINK:
return result; return result;
} }

View File

@ -154,5 +154,28 @@ void tick_ftimer(float *value);
/* returns true if value was cycled */ /* returns true if value was cycled */
bool repeat_ftimer(float *value, float at); bool repeat_ftimer(float *value, float at);
/* http://www.azillionmonkeys.com/qed/sqroot.html */
static inline float fast_sqrt(float x)
{
union {
float f;
uint32_t u;
} pun = {.f = x};
pun.u += 127 << 23;
pun.u >>= 1;
return pun.f;
}
static inline t_fvec2 fast_cossine(float a) {
const float s = sinf(a);
return (t_fvec2){
.x = fast_sqrt(1.0f - s * s) * (a >= (float)M_PI_2 && a < (float)(M_PI + M_PI_2) ? -1 : 1),
.y = s
};
}
#endif #endif