Compare commits

...

8 Commits

Author SHA1 Message Date
ff583a3724 article: circle-rasterization 2025-01-25 02:32:05 +03:00
6f979a1905 add link 2024-10-14 14:55:56 +03:00
b8c5be8052 prepare.sh: add git-lfs fetch 2024-09-03 14:13:03 +03:00
5555808095 introduce dichotomy of local and remote host 2024-09-03 13:28:23 +03:00
2ea2ce8e54 git_webhook.py: rewrite comment to reflect the reality 2024-09-03 13:27:46 +03:00
13d46ad901 gitea webhook service 2024-09-03 13:22:49 +03:00
7972becddd update links 2024-07-30 23:28:40 +03:00
16c045ba97 update 2024-07-30 23:28:33 +03:00
9 changed files with 192 additions and 6 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@ -0,0 +1,84 @@
Title: Circle Rasterization
Brief: Investigation on fast grid-aligned circle rasterization.
Date: 1737757212
Tags: Programming, Optimization, C
CSS: /style.css
![](/articles/circle-rasterization/circles.webp)
Currently drastically overthinking anything related to dream Minecraft-like game of mine,
and today it was all about chunk loading. Particularly, ideal way to infer which chunks
should be loaded based on distance to the viewer, instead of typical direct grid.
For that circle rasterization is needed. I came up with following pieces of code, one reusable macro,
and others are meant to be directly copy pasted where needed:
Macro:
```c
/* Emits `x` and `y` for every intersecting cell */
/* We snap position to the nearest corner, which means there's no aliasing */
/* It works great for integer radii */
#define m_iter_circle_pixels(p_center_x, p_center_y, p_radius) \
for (float y = (p_center_y + ceilf(p_radius)) - 1; y > (p_center_y - ceilf(p_radius)) - 1; --y) \
for (float x = p_center_x - ceilf(sqrtf(p_radius * p_radius - (y - p_center_y + (y <= p_center_y)) * (y - p_center_y + (y <= p_center_y)))); \
x < p_center_x + ceilf(sqrtf(p_radius * p_radius - (y - p_center_y + (y <= p_center_y)) * (y - p_center_y + (y <= p_center_y)))); ++x)
```
Floating point based one:
```c
float const rs = state->r * state->r;
float const cr = ceilf(state->r);
for (float iy = -cr; iy <= cr - 1; ++iy) {
float const dx = ceilf(sqrtf(rs - (iy + (iy <= 0)) * (iy + (iy <= 0))));
for (float ix = -dx; ix < dx; ++ix) {
/* iy and ix are floating point offsets from (0, 0) */
}
}
```
Integer math based one:
```c
/* Neat shorthand making integer based loops drastically faster */
static int32_t ceil_sqrt(int32_t const n) {
int32_t res = 1;
#pragma clang loop unroll_count(8)
while(res * res < n)
res++;
return res;
}
/* This one beats the float in raw performance, but might scale worse at increasing radii, assuming sqrt is a hardware intrinsic with known worst time */
int32_t const rsi = (int32_t)state->r * (int32_t)state->r;
for (int32_t iy = -(int32_t)state->r; iy <= (int32_t)state->r - 1; ++iy) {
int32_t const dx = ceil_sqrt(rsi - (iy + (iy <= 0)) * (iy + (iy <= 0)));
for (int32_t ix = -dx; ix < dx; ++ix) {
/* iy and ix are integer offsets from (0, 0) */
}
}
```
Integer math based with accumulated ceil(sqrt()), the fastest I could come up with:
```c
int32_t const rsi = (int32_t)state->r * (int32_t)state->r;
int32_t acc = 1;
for (int32_t iy = (int32_t)state->r - 1; iy >= 0; --iy) {
while (acc * acc < rsi - iy * iy) acc++;
for (int32_t ix = -acc; ix < acc; ++ix) {
/* lower portion */
x = (float)ix;
y = (float)iy;
/* upper portion */
x = (float)ix;
y = (float)-iy - 1;
}
}
```
Note that they assume center point at coordinate origin, quadrant symmetry and whole number radii.
Benchmarks:
```
Profile 'float' on average took: 0.001537s, worst case: 0.003272s, sample count: 277
Profile 'int32_t' on average took: 0.000726s, worst case: 0.002293s, sample count: 277
Profile 'int32_t acc' on average took: 0.000650s, worst case: 0.001732s, sample count: 277
```

View File

@ -7,16 +7,46 @@ CSS: /style.css
A similar in essence trick to [by pi rotation](/articles/vector-pi-rotation.html), but with delta calculated A similar in essence trick to [by pi rotation](/articles/vector-pi-rotation.html), but with delta calculated
for some corner which is reused later with negation and coordinate swap. for some corner which is reused later with negation and coordinate swap.
Scaling by `M_SQRT1_2` is there to retain the quad size (Pythagorean stuffs). Additionally `cos(a) = sqrt(1 - sin(a) ^ 2)` is used to reuse the result of sin(a),
with `fast_sqrt()` for good measure.
### Code ### ### Code ###
```c ```c
const t_fvec2 c = frect_center(sprite.rect); /* http://www.azillionmonkeys.com/qed/sqroot.html */
static inline float fast_sqrt(float x)
{
union {
float f;
uint32_t u;
} pun = {.f = x};
pun.u += 127 << 23;
pun.u >>= 1;
return pun.f;
}
/* instead of calculating cosf again, - use sinf result */
static inline t_fvec2 fast_cossine(float a) {
const float s = sinf(a);
return (t_fvec2){
.x = fast_sqrt(1.0f - s * s) *
(a >= (float)M_PI_2 && a < (float)(M_PI + M_PI_2) ? -1 : 1),
.y = s
};
}
/* final vertex calculation */
const t_fvec2 t = fast_cossine(sprite.rotation + (float)M_PI_4);
/* scaling by `M_SQRT1_2` is there to retain the quad size (Pythagorean stuffs). */
const t_fvec2 d = { const t_fvec2 d = {
.x = (cosf(sprite.rotation + (float)M_PI_4) * sprite.rect.w) * (float)M_SQRT1_2, .x = t.x * sprite.rect.w * (float)M_SQRT1_2,
.y = (sinf(sprite.rotation + (float)M_PI_4) * sprite.rect.h) * (float)M_SQRT1_2, .y = t.y * sprite.rect.h * (float)M_SQRT1_2,
}; };
const t_fvec2 c = frect_center(sprite.rect);
/* upper-left */ /* upper-left */
const t_fvec2 v0 = { c.x - d.x, c.y - d.y }; const t_fvec2 v0 = { c.x - d.x, c.y - d.y };

View File

@ -51,6 +51,7 @@ CSS: /style.css
- [Occlusion culling for terrain](https://www.researchgate.net/publication/248358913_Voxel_Column_Culling_Occlusion_Culling_For_Large_Terrain_Models) - [Occlusion culling for terrain](https://www.researchgate.net/publication/248358913_Voxel_Column_Culling_Occlusion_Culling_For_Large_Terrain_Models)
- [Billboard quad transformation optimization](https://gamedev.stackexchange.com/questions/201963/efficient-calculation-of-billboard-sprite-transformations) - [Billboard quad transformation optimization](https://gamedev.stackexchange.com/questions/201963/efficient-calculation-of-billboard-sprite-transformations)
- [NVidia bindless extensions](https://developer.download.nvidia.com/opengl/tutorials/bindless_graphics.pdf) - [NVidia bindless extensions](https://developer.download.nvidia.com/opengl/tutorials/bindless_graphics.pdf)
- [hacksoflife blog, full of good things](http://hacksoflife.blogspot.com/search/label/OpenGL)
## technical stuff ## technical stuff
- [Determinism between opengl vendors](https://stackoverflow.com/questions/7922526/opengl-deterministic-rendering-between-gpu-vendor) - [Determinism between opengl vendors](https://stackoverflow.com/questions/7922526/opengl-deterministic-rendering-between-gpu-vendor)
@ -65,6 +66,9 @@ CSS: /style.css
- [Capsule collision detection](https://wickedengine.net/2020/04/26/capsule-collision-detection/) - [Capsule collision detection](https://wickedengine.net/2020/04/26/capsule-collision-detection/)
- [Forsyth vertex cache optimization](https://tomforsyth1000.github.io/papers/fast_vert_cache_opt.html) - [Forsyth vertex cache optimization](https://tomforsyth1000.github.io/papers/fast_vert_cache_opt.html)
- [Depth buffer based lighting](https://www.researchgate.net/publication/320616607_Eye-Dome_Lighting_a_non-photorealistic_shading_technique) - [Depth buffer based lighting](https://www.researchgate.net/publication/320616607_Eye-Dome_Lighting_a_non-photorealistic_shading_technique)
- [Computational Geometry in C (Second Edition)](http://www.science.smith.edu/~jorourke/books/compgeom.html)
- [OpenGL FAQ](https://www.opengl.org/archives/resources/faq/technical/)
- [SGI BSP FAQ](https://web.archive.org/web/20010614072959/http://reality.sgi.com/bspfaq/)
## generational stuff ## generational stuff
- [Domain warping](https://iquilezles.org/articles/warp/) - [Domain warping](https://iquilezles.org/articles/warp/)
@ -79,7 +83,6 @@ CSS: /style.css
## notable extensions ## notable extensions
- [Vertex array locking](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_compiled_vertex_array.txt) - [Vertex array locking](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_compiled_vertex_array.txt)
- [Packed pixels](https://people.freedesktop.org/~marcheu/extensions/EXT/packed_pixels.html) - [Packed pixels](https://people.freedesktop.org/~marcheu/extensions/EXT/packed_pixels.html)
- [Provoking vertex](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_provoking_vertex.txt)
- [Framebuffer fetch](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_shader_framebuffer_fetch.txt) - [Framebuffer fetch](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_shader_framebuffer_fetch.txt)
- [Integer textures](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_integer.txt) - [Integer textures](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_integer.txt)
- [Texture swizzle](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_swizzle.txt) - [Texture swizzle](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_swizzle.txt)
@ -96,7 +99,7 @@ CSS: /style.css
- [Shader inter group communication](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_shader_ballot.txt) - [Shader inter group communication](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_shader_ballot.txt)
- [Granular buffer memory control](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_sparse_buffer.txt) - [Granular buffer memory control](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_sparse_buffer.txt)
- [Window pos](https://people.freedesktop.org/~marcheu/extensions/ARB/window_pos.html) - [Window pos](https://people.freedesktop.org/~marcheu/extensions/ARB/window_pos.html)
- [Optimized fixed function fog](https://people.freedesktop.org/~marcheu/extensions/doc/fog_coord.html) - [No perspective interpolation for screen aligned geometry](https://registry.khronos.org/OpenGL/extensions/NV/NV_shader_noperspective_interpolation.txt)
## data representations ## data representations
- [Efficient varying-length integers](https://john-millikin.com/vu128-efficient-variable-length-integers) - [Efficient varying-length integers](https://john-millikin.com/vu128-efficient-variable-length-integers)

View File

@ -34,3 +34,12 @@ logo = "/logo.png"
## Language specifier, used in RSS feed. ## Language specifier, used in RSS feed.
## ##
language = "en" language = "en"
## Port that is used to listed to remote git push signals.
##
webhook_port = 14032
## Something that only git hosting and your server should know.
## See: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Authorization
##
webhook_auth = "Basic you-secure-credentials"

View File

@ -3,6 +3,7 @@
set +e set +e
git submodule update --init --recursive git submodule update --init --recursive
git-lfs fetch
(cd tools/mmd && make release) (cd tools/mmd && make release)
(cd tools/mmd/build && make) (cd tools/mmd/build && make)

3
remote_host.sh Executable file
View File

@ -0,0 +1,3 @@
#!/usr/bin/env bash
python3 -m http.server --directory ./html/ & python3 ./tools/git_webhook.py

56
tools/git_webhook.py Normal file
View File

@ -0,0 +1,56 @@
#!/usr/bin/env python3
from http.client import parse_headers
from http.server import BaseHTTPRequestHandler
from http.server import HTTPServer
import subprocess
import config
## Simple way to automatically pull and recompile on a remote server.
## Run this from the root directory.
##
## Currently supports:
## - Gitea (Via GET method).
##
class HttpHandler(BaseHTTPRequestHandler):
def do_GET(self):
got_gitea_push_event = False
got_auth = False
for header in self.headers:
match [header, self.headers[header]]:
case ["X-Gitea-Event", "push"]:
got_gitea_push_event = True
case ["Authorization", config.webhook_auth]:
got_auth = True
if not got_gitea_push_event or not got_auth:
self.send_response(400)
return
# todo: This way of doing it blocks both parties. Not ideal.
self.send_response(200)
subprocess.run(["git", "pull"])
subprocess.run(["./compile.sh"])
print("Pulled and recompiled.")
def run(server_class=HTTPServer, handler_class=HttpHandler):
server_address = ('', config.webhook_port)
httpd = server_class(server_address, handler_class)
try:
httpd.serve_forever()
except KeyboardInterrupt:
httpd.server_close()
if __name__ == "__main__":
run()