Compare commits
14 Commits
d6a3f7465d
...
master
Author | SHA1 | Date | |
---|---|---|---|
ff583a3724 | |||
6f979a1905 | |||
b8c5be8052 | |||
5555808095 | |||
2ea2ce8e54 | |||
13d46ad901 | |||
7972becddd | |||
16c045ba97 | |||
7f0d22e5dc | |||
a0ee2169e5 | |||
aa2f7a45b8 | |||
d1fe7a9230 | |||
12b32acb2b | |||
b06759de76 |
BIN
articles/circle-rasterization/.static/circles.webp
Normal file
BIN
articles/circle-rasterization/.static/circles.webp
Normal file
Binary file not shown.
After Width: | Height: | Size: 15 KiB |
84
articles/circle-rasterization/page.mmd
Normal file
84
articles/circle-rasterization/page.mmd
Normal file
@ -0,0 +1,84 @@
|
||||
Title: Circle Rasterization
|
||||
Brief: Investigation on fast grid-aligned circle rasterization.
|
||||
Date: 1737757212
|
||||
Tags: Programming, Optimization, C
|
||||
CSS: /style.css
|
||||
|
||||

|
||||
|
||||
Currently drastically overthinking anything related to dream Minecraft-like game of mine,
|
||||
and today it was all about chunk loading. Particularly, ideal way to infer which chunks
|
||||
should be loaded based on distance to the viewer, instead of typical direct grid.
|
||||
|
||||
For that circle rasterization is needed. I came up with following pieces of code, one reusable macro,
|
||||
and others are meant to be directly copy pasted where needed:
|
||||
|
||||
Macro:
|
||||
```c
|
||||
/* Emits `x` and `y` for every intersecting cell */
|
||||
/* We snap position to the nearest corner, which means there's no aliasing */
|
||||
/* It works great for integer radii */
|
||||
#define m_iter_circle_pixels(p_center_x, p_center_y, p_radius) \
|
||||
for (float y = (p_center_y + ceilf(p_radius)) - 1; y > (p_center_y - ceilf(p_radius)) - 1; --y) \
|
||||
for (float x = p_center_x - ceilf(sqrtf(p_radius * p_radius - (y - p_center_y + (y <= p_center_y)) * (y - p_center_y + (y <= p_center_y)))); \
|
||||
x < p_center_x + ceilf(sqrtf(p_radius * p_radius - (y - p_center_y + (y <= p_center_y)) * (y - p_center_y + (y <= p_center_y)))); ++x)
|
||||
```
|
||||
|
||||
Floating point based one:
|
||||
```c
|
||||
float const rs = state->r * state->r;
|
||||
float const cr = ceilf(state->r);
|
||||
for (float iy = -cr; iy <= cr - 1; ++iy) {
|
||||
float const dx = ceilf(sqrtf(rs - (iy + (iy <= 0)) * (iy + (iy <= 0))));
|
||||
for (float ix = -dx; ix < dx; ++ix) {
|
||||
/* iy and ix are floating point offsets from (0, 0) */
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Integer math based one:
|
||||
```c
|
||||
/* Neat shorthand making integer based loops drastically faster */
|
||||
static int32_t ceil_sqrt(int32_t const n) {
|
||||
int32_t res = 1;
|
||||
#pragma clang loop unroll_count(8)
|
||||
while(res * res < n)
|
||||
res++;
|
||||
return res;
|
||||
}
|
||||
|
||||
/* This one beats the float in raw performance, but might scale worse at increasing radii, assuming sqrt is a hardware intrinsic with known worst time */
|
||||
int32_t const rsi = (int32_t)state->r * (int32_t)state->r;
|
||||
for (int32_t iy = -(int32_t)state->r; iy <= (int32_t)state->r - 1; ++iy) {
|
||||
int32_t const dx = ceil_sqrt(rsi - (iy + (iy <= 0)) * (iy + (iy <= 0)));
|
||||
for (int32_t ix = -dx; ix < dx; ++ix) {
|
||||
/* iy and ix are integer offsets from (0, 0) */
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Integer math based with accumulated ceil(sqrt()), the fastest I could come up with:
|
||||
```c
|
||||
int32_t const rsi = (int32_t)state->r * (int32_t)state->r;
|
||||
int32_t acc = 1;
|
||||
for (int32_t iy = (int32_t)state->r - 1; iy >= 0; --iy) {
|
||||
while (acc * acc < rsi - iy * iy) acc++;
|
||||
for (int32_t ix = -acc; ix < acc; ++ix) {
|
||||
/* lower portion */
|
||||
x = (float)ix;
|
||||
y = (float)iy;
|
||||
/* upper portion */
|
||||
x = (float)ix;
|
||||
y = (float)-iy - 1;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Note that they assume center point at coordinate origin, quadrant symmetry and whole number radii.
|
||||
|
||||
Benchmarks:
|
||||
```
|
||||
Profile 'float' on average took: 0.001537s, worst case: 0.003272s, sample count: 277
|
||||
Profile 'int32_t' on average took: 0.000726s, worst case: 0.002293s, sample count: 277
|
||||
Profile 'int32_t acc' on average took: 0.000650s, worst case: 0.001732s, sample count: 277
|
||||
```
|
62
articles/fast-quad-rotation/page.mmd
Normal file
62
articles/fast-quad-rotation/page.mmd
Normal file
@ -0,0 +1,62 @@
|
||||
Title: Fast Quad Rotation
|
||||
Brief: A better way to rotate quads around their centers.
|
||||
Date: 1722126213
|
||||
Tags: Programming, Optimization, C
|
||||
CSS: /style.css
|
||||
|
||||
A similar in essence trick to [by pi rotation](/articles/vector-pi-rotation.html), but with delta calculated
|
||||
for some corner which is reused later with negation and coordinate swap.
|
||||
|
||||
Additionally `cos(a) = sqrt(1 - sin(a) ^ 2)` is used to reuse the result of sin(a),
|
||||
with `fast_sqrt()` for good measure.
|
||||
|
||||
### Code ###
|
||||
```c
|
||||
/* http://www.azillionmonkeys.com/qed/sqroot.html */
|
||||
static inline float fast_sqrt(float x)
|
||||
{
|
||||
union {
|
||||
float f;
|
||||
uint32_t u;
|
||||
} pun = {.f = x};
|
||||
|
||||
pun.u += 127 << 23;
|
||||
pun.u >>= 1;
|
||||
|
||||
return pun.f;
|
||||
}
|
||||
|
||||
/* instead of calculating cosf again, - use sinf result */
|
||||
static inline t_fvec2 fast_cossine(float a) {
|
||||
const float s = sinf(a);
|
||||
return (t_fvec2){
|
||||
.x = fast_sqrt(1.0f - s * s) *
|
||||
(a >= (float)M_PI_2 && a < (float)(M_PI + M_PI_2) ? -1 : 1),
|
||||
.y = s
|
||||
};
|
||||
}
|
||||
|
||||
/* final vertex calculation */
|
||||
const t_fvec2 t = fast_cossine(sprite.rotation + (float)M_PI_4);
|
||||
|
||||
/* scaling by `M_SQRT1_2` is there to retain the quad size (Pythagorean stuffs). */
|
||||
const t_fvec2 d = {
|
||||
.x = t.x * sprite.rect.w * (float)M_SQRT1_2,
|
||||
.y = t.y * sprite.rect.h * (float)M_SQRT1_2,
|
||||
};
|
||||
|
||||
const t_fvec2 c = frect_center(sprite.rect);
|
||||
|
||||
/* upper-left */
|
||||
const t_fvec2 v0 = { c.x - d.x, c.y - d.y };
|
||||
|
||||
/* bottom-left */
|
||||
const t_fvec2 v1 = { c.x - d.y, c.y + d.x };
|
||||
|
||||
/* bottom-right */
|
||||
const t_fvec2 v2 = { c.x + d.x, c.y + d.y };
|
||||
|
||||
/* upper-right */
|
||||
const t_fvec2 v3 = { c.x + d.y, c.y - d.x };
|
||||
|
||||
```
|
@ -51,6 +51,7 @@ CSS: /style.css
|
||||
- [Occlusion culling for terrain](https://www.researchgate.net/publication/248358913_Voxel_Column_Culling_Occlusion_Culling_For_Large_Terrain_Models)
|
||||
- [Billboard quad transformation optimization](https://gamedev.stackexchange.com/questions/201963/efficient-calculation-of-billboard-sprite-transformations)
|
||||
- [NVidia bindless extensions](https://developer.download.nvidia.com/opengl/tutorials/bindless_graphics.pdf)
|
||||
- [hacksoflife blog, full of good things](http://hacksoflife.blogspot.com/search/label/OpenGL)
|
||||
|
||||
## technical stuff
|
||||
- [Determinism between opengl vendors](https://stackoverflow.com/questions/7922526/opengl-deterministic-rendering-between-gpu-vendor)
|
||||
@ -59,13 +60,15 @@ CSS: /style.css
|
||||
- [Line and circle rasterization](http://www.sunshine2k.de/coding/java/Bresenham/RasterisingLinesCircles.pdf)
|
||||
- [Occlusion culling of Vintage Story](https://github.com/tyronx/occlusionculling)
|
||||
- [Minecraft work on cave occlusion, in 2 parts](https://tomcc.github.io/2014/08/31/visibility-1.html)
|
||||
- [Awesome article on hashtables](https://thenumb.at/Hashtables/)
|
||||
- [Order independent blending technique](https://jcgt.org/published/0002/02/09/)
|
||||
- [High performance voxel engine](https://nickmcd.me/2021/04/04/high-performance-voxel-engine/)
|
||||
- [Monotone meshing](https://blackflux.wordpress.com/tag/monotone-meshing/)
|
||||
- [Capsule collision detection](https://wickedengine.net/2020/04/26/capsule-collision-detection/)
|
||||
- [Forsyth vertex cache optimization](https://tomforsyth1000.github.io/papers/fast_vert_cache_opt.html)
|
||||
- [Depth buffer based lighting](https://www.researchgate.net/publication/320616607_Eye-Dome_Lighting_a_non-photorealistic_shading_technique)
|
||||
- [Computational Geometry in C (Second Edition)](http://www.science.smith.edu/~jorourke/books/compgeom.html)
|
||||
- [OpenGL FAQ](https://www.opengl.org/archives/resources/faq/technical/)
|
||||
- [SGI BSP FAQ](https://web.archive.org/web/20010614072959/http://reality.sgi.com/bspfaq/)
|
||||
|
||||
## generational stuff
|
||||
- [Domain warping](https://iquilezles.org/articles/warp/)
|
||||
@ -80,7 +83,6 @@ CSS: /style.css
|
||||
## notable extensions
|
||||
- [Vertex array locking](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_compiled_vertex_array.txt)
|
||||
- [Packed pixels](https://people.freedesktop.org/~marcheu/extensions/EXT/packed_pixels.html)
|
||||
- [Provoking vertex](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_provoking_vertex.txt)
|
||||
- [Framebuffer fetch](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_shader_framebuffer_fetch.txt)
|
||||
- [Integer textures](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_integer.txt)
|
||||
- [Texture swizzle](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_swizzle.txt)
|
||||
@ -97,4 +99,10 @@ CSS: /style.css
|
||||
- [Shader inter group communication](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_shader_ballot.txt)
|
||||
- [Granular buffer memory control](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_sparse_buffer.txt)
|
||||
- [Window pos](https://people.freedesktop.org/~marcheu/extensions/ARB/window_pos.html)
|
||||
- [Optimized fixed function fog](https://people.freedesktop.org/~marcheu/extensions/doc/fog_coord.html)
|
||||
- [No perspective interpolation for screen aligned geometry](https://registry.khronos.org/OpenGL/extensions/NV/NV_shader_noperspective_interpolation.txt)
|
||||
|
||||
## data representations
|
||||
- [Efficient varying-length integers](https://john-millikin.com/vu128-efficient-variable-length-integers)
|
||||
- [Awesome article on hashtables](https://thenumb.at/Hashtables/)
|
||||
- [Crit-bit trees](https://cr.yp.to/critbit.html)
|
||||
- [QP tries](https://dotat.at/prog/qp/README.html)
|
||||
|
@ -52,6 +52,10 @@ struct sqrtwave {
|
||||
} v;
|
||||
} init_sqrtwave(float frequency, float phase, float amplitude) {
|
||||
struct sqrtwave r;
|
||||
union {
|
||||
float f;
|
||||
uint32_t u;
|
||||
} v, a;
|
||||
r.w = init_sinewave(frequency, phase, 1.f);
|
||||
v.f = r.w.s;
|
||||
a.f = amplitude;
|
||||
|
90
articles/rodata-lookup-caching/page.mmd
Normal file
90
articles/rodata-lookup-caching/page.mmd
Normal file
@ -0,0 +1,90 @@
|
||||
Title: Lookup Caching by .rodata Section String Inference
|
||||
Brief: Rather hacky, but working way of string key lookup acceleration.
|
||||
Date: 1722127090
|
||||
Tags: Programming, Optimization, C, Linux
|
||||
CSS: /style.css
|
||||
|
||||
While working on our immediate no-state engine, the need for texture lookup optimization arose.
|
||||
API is designed in a way where every single pushed triangle means resolution of texture by path.
|
||||
|
||||
My insane mind came to such optimization then: detect whether given path pointer is in .rodata and if so, -
|
||||
just lookup by hash of the pointer itself, not whole varying-size string. Constant time and all that.
|
||||
|
||||
For that I ended up writing a limited ELF parsing routine that expects `/proc/self/exe`.
|
||||
Virtual address space randomization was tricky until I realized that
|
||||
`getauxval(AT_ENTRY) - ehdr.e_entry` could be used to get the base process address.
|
||||
|
||||
After the section bounds are known, - it's as simple as checking `vm_start >= ptr && ptr < vm_end`.
|
||||
|
||||
### Code ###
|
||||
```c
|
||||
/* code is fully self-contained, feel free to use it :) */
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/auxv.h>
|
||||
#include <elf.h>
|
||||
#include <linux/limits.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
bool infer_elf_section_bounds(const char *const restrict name,
|
||||
const char **restrict vm_start,
|
||||
const char **restrict vm_end)
|
||||
{
|
||||
bool result = false;
|
||||
char buf[PATH_MAX];
|
||||
ssize_t l = readlink("/proc/self/exe", buf, PATH_MAX);
|
||||
if (l == -1)
|
||||
goto ERR_CANT_READLINK;
|
||||
buf[l] = 0; /* readlink() doesn't write a terminator */
|
||||
|
||||
int elf = open(buf, O_RDONLY);
|
||||
if (elf == -1)
|
||||
goto ERR_CANT_OPEN_SELF;
|
||||
|
||||
/* elf header */
|
||||
Elf64_Ehdr ehdr;
|
||||
read(elf, &ehdr, sizeof ehdr);
|
||||
if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
|
||||
ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
|
||||
ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
|
||||
ehdr.e_ident[EI_MAG3] != ELFMAG3)
|
||||
goto ERR_NOT_ELF;
|
||||
|
||||
/* section header string table */
|
||||
Elf64_Shdr shstrdr;
|
||||
lseek(elf, ehdr.e_shoff + ehdr.e_shstrndx * sizeof (Elf64_Shdr), SEEK_SET);
|
||||
read(elf, &shstrdr, sizeof shstrdr);
|
||||
char *sh = malloc(shstrdr.sh_size);
|
||||
lseek(elf, shstrdr.sh_offset, SEEK_SET);
|
||||
read(elf, sh, shstrdr.sh_size);
|
||||
|
||||
/* walk sections searching for needed name */
|
||||
lseek(elf, ehdr.e_shoff, SEEK_SET);
|
||||
for (size_t s = 0; s < ehdr.e_shnum; ++s) {
|
||||
Elf64_Shdr shdr;
|
||||
read(elf, &shdr, sizeof shdr);
|
||||
|
||||
if (strcmp(&sh[shdr.sh_name], name) == 0) {
|
||||
result = true;
|
||||
*vm_start = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr;
|
||||
*vm_end = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr + shdr.sh_size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free(sh);
|
||||
|
||||
ERR_NOT_ELF:
|
||||
close(elf);
|
||||
|
||||
ERR_CANT_OPEN_SELF:
|
||||
ERR_CANT_READLINK:
|
||||
return result;
|
||||
}
|
||||
|
||||
```
|
@ -34,3 +34,12 @@ logo = "/logo.png"
|
||||
## Language specifier, used in RSS feed.
|
||||
##
|
||||
language = "en"
|
||||
|
||||
## Port that is used to listed to remote git push signals.
|
||||
##
|
||||
webhook_port = 14032
|
||||
|
||||
## Something that only git hosting and your server should know.
|
||||
## See: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Authorization
|
||||
##
|
||||
webhook_auth = "Basic you-secure-credentials"
|
||||
|
@ -3,6 +3,7 @@
|
||||
set +e
|
||||
|
||||
git submodule update --init --recursive
|
||||
git-lfs fetch
|
||||
|
||||
(cd tools/mmd && make release)
|
||||
(cd tools/mmd/build && make)
|
||||
|
3
remote_host.sh
Executable file
3
remote_host.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
python3 -m http.server --directory ./html/ & python3 ./tools/git_webhook.py
|
56
tools/git_webhook.py
Normal file
56
tools/git_webhook.py
Normal file
@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from http.client import parse_headers
|
||||
from http.server import BaseHTTPRequestHandler
|
||||
from http.server import HTTPServer
|
||||
|
||||
import subprocess
|
||||
|
||||
import config
|
||||
|
||||
## Simple way to automatically pull and recompile on a remote server.
|
||||
## Run this from the root directory.
|
||||
##
|
||||
## Currently supports:
|
||||
## - Gitea (Via GET method).
|
||||
##
|
||||
|
||||
|
||||
class HttpHandler(BaseHTTPRequestHandler):
|
||||
def do_GET(self):
|
||||
got_gitea_push_event = False
|
||||
got_auth = False
|
||||
|
||||
for header in self.headers:
|
||||
match [header, self.headers[header]]:
|
||||
case ["X-Gitea-Event", "push"]:
|
||||
got_gitea_push_event = True
|
||||
|
||||
case ["Authorization", config.webhook_auth]:
|
||||
got_auth = True
|
||||
|
||||
|
||||
if not got_gitea_push_event or not got_auth:
|
||||
self.send_response(400)
|
||||
return
|
||||
|
||||
# todo: This way of doing it blocks both parties. Not ideal.
|
||||
self.send_response(200)
|
||||
|
||||
subprocess.run(["git", "pull"])
|
||||
subprocess.run(["./compile.sh"])
|
||||
|
||||
print("Pulled and recompiled.")
|
||||
|
||||
|
||||
def run(server_class=HTTPServer, handler_class=HttpHandler):
|
||||
server_address = ('', config.webhook_port)
|
||||
httpd = server_class(server_address, handler_class)
|
||||
try:
|
||||
httpd.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
httpd.server_close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
Reference in New Issue
Block a user