article: circle-rasterization

add link
prepare.sh: add git-lfs fetch
2025-01-25 02:32:05 +03:00 · 2024-10-14 14:55:56 +03:00 · 2024-09-03 14:13:03 +03:00 · 2024-09-03 13:28:23 +03:00 · 2024-09-03 13:27:46 +03:00 · 2024-09-03 13:22:49 +03:00
12 changed files with 334 additions and 9 deletions
--- a/articles/circle-rasterization/.static/circles.webp
+++ b/articles/circle-rasterization/.static/circles.webp
--- a/articles/circle-rasterization/page.mmd
+++ b/articles/circle-rasterization/page.mmd
@ -0,0 +1,84 @@
 Title:  Circle Rasterization
 Brief:  Investigation on fast grid-aligned circle rasterization.
 Date:   1737757212
 Tags:   Programming, Optimization, C
 CSS:    /style.css
 ![](/articles/circle-rasterization/circles.webp)
 Currently drastically overthinking anything related to dream Minecraft-like game of mine,
 and today it was all about chunk loading. Particularly, ideal way to infer which chunks
 should be loaded based on distance to the viewer, instead of typical direct grid.
 For that circle rasterization is needed. I came up with following pieces of code, one reusable macro,
 and others are meant to be directly copy pasted where needed:
 Macro:
 ```c
 /* Emits `x` and `y` for every intersecting cell */
 /* We snap position to the nearest corner, which means there's no aliasing */
 /* It works great for integer radii */
 #define m_iter_circle_pixels(p_center_x, p_center_y, p_radius) \
    for (float y = (p_center_y + ceilf(p_radius)) - 1; y > (p_center_y - ceilf(p_radius)) - 1; --y) \
        for (float x = p_center_x - ceilf(sqrtf(p_radius * p_radius - (y - p_center_y + (y <= p_center_y)) * (y - p_center_y + (y <= p_center_y)))); \
                   x < p_center_x + ceilf(sqrtf(p_radius * p_radius - (y - p_center_y + (y <= p_center_y)) * (y - p_center_y + (y <= p_center_y)))); ++x)
 ```
 Floating point based one:
 ```c
 float const rs = state->r * state->r;
 float const cr = ceilf(state->r);
 for (float iy = -cr; iy <= cr - 1; ++iy) {
    float const dx = ceilf(sqrtf(rs - (iy + (iy <= 0)) * (iy + (iy <= 0))));
    for (float ix = -dx; ix < dx; ++ix) {
        /* iy and ix are floating point offsets from (0, 0) */
    }
 }
 ```
 Integer math based one:
 ```c
 /* Neat shorthand making integer based loops drastically faster */
 static int32_t ceil_sqrt(int32_t const n) {
    int32_t res = 1;
    #pragma clang loop unroll_count(8)
    while(res * res < n)
        res++;
    return res;
 }
 /* This one beats the float in raw performance, but might scale worse at increasing radii, assuming sqrt is a hardware intrinsic with known worst time */
 int32_t const rsi = (int32_t)state->r * (int32_t)state->r;
 for (int32_t iy = -(int32_t)state->r; iy <= (int32_t)state->r - 1; ++iy) {
    int32_t const dx = ceil_sqrt(rsi - (iy + (iy <= 0)) * (iy + (iy <= 0)));
    for (int32_t ix = -dx; ix < dx; ++ix) {
        /* iy and ix are integer offsets from (0, 0) */
    }
 }
 ```
 Integer math based with accumulated ceil(sqrt()), the fastest I could come up with:
 ```c
 int32_t const rsi = (int32_t)state->r * (int32_t)state->r;
 int32_t acc = 1;
 for (int32_t iy = (int32_t)state->r - 1; iy >= 0; --iy) {
    while (acc * acc < rsi - iy * iy) acc++;
    for (int32_t ix = -acc; ix < acc; ++ix) {
        /* lower portion */
        x = (float)ix;
        y = (float)iy;
        /* upper portion */
        x = (float)ix;
        y = (float)-iy - 1;
    }
 }
 ```
 Note that they assume center point at coordinate origin, quadrant symmetry and whole number radii.
 Benchmarks:
 ```
 Profile 'float'         on average took: 0.001537s, worst case: 0.003272s, sample count: 277
 Profile 'int32_t'       on average took: 0.000726s, worst case: 0.002293s, sample count: 277
 Profile 'int32_t acc'   on average took: 0.000650s, worst case: 0.001732s, sample count: 277
 ```
--- a/articles/fast-quad-rotation/page.mmd
+++ b/articles/fast-quad-rotation/page.mmd
@ -0,0 +1,62 @@
 Title:  Fast Quad Rotation
 Brief:  A better way to rotate quads around their centers.
 Date:   1722126213
 Tags:   Programming, Optimization, C
 CSS:    /style.css
 A similar in essence trick to [by pi rotation](/articles/vector-pi-rotation.html), but with delta calculated
 for some corner which is reused later with negation and coordinate swap.
 Additionally `cos(a) = sqrt(1 - sin(a) ^ 2)` is used to reuse the result of sin(a),
 with `fast_sqrt()` for good measure.
 ### Code ###
 ```c
 /* http://www.azillionmonkeys.com/qed/sqroot.html */
 static inline float fast_sqrt(float x)
 {
    union {
        float f;
        uint32_t u;
    } pun = {.f = x};
    pun.u += 127 << 23;
    pun.u >>= 1;
    return pun.f;
 }
 /* instead of calculating cosf again, - use sinf result */
 static inline t_fvec2 fast_cossine(float a) {
    const float s = sinf(a);
    return (t_fvec2){
        .x = fast_sqrt(1.0f - s * s) *
            (a >= (float)M_PI_2 && a < (float)(M_PI + M_PI_2) ? -1 : 1),
        .y = s
    };
 }
 /* final vertex calculation */
 const t_fvec2 t = fast_cossine(sprite.rotation + (float)M_PI_4);
 /* scaling by `M_SQRT1_2` is there to retain the quad size (Pythagorean stuffs). */
 const t_fvec2 d = {
    .x = t.x * sprite.rect.w * (float)M_SQRT1_2,
    .y = t.y * sprite.rect.h * (float)M_SQRT1_2,
 };
 const t_fvec2 c = frect_center(sprite.rect);
 /* upper-left */
 const t_fvec2 v0 = { c.x - d.x, c.y - d.y };
 /* bottom-left */
 const t_fvec2 v1 = { c.x - d.y, c.y + d.x };
 /* bottom-right */
 const t_fvec2 v2 = { c.x + d.x, c.y + d.y };
 /* upper-right */
 const t_fvec2 v3 = { c.x + d.y, c.y - d.x };
 ```
--- a/articles/links-of-the-open-world-programmer/page.mmd
+++ b/articles/links-of-the-open-world-programmer/page.mmd
@ -51,6 +51,7 @@ CSS:    /style.css
 - [Occlusion culling for terrain](https://www.researchgate.net/publication/248358913_Voxel_Column_Culling_Occlusion_Culling_For_Large_Terrain_Models)
 - [Billboard quad transformation optimization](https://gamedev.stackexchange.com/questions/201963/efficient-calculation-of-billboard-sprite-transformations)
 - [NVidia bindless extensions](https://developer.download.nvidia.com/opengl/tutorials/bindless_graphics.pdf)
 - [hacksoflife blog, full of good things](http://hacksoflife.blogspot.com/search/label/OpenGL)
 ## technical stuff
 - [Determinism between opengl vendors](https://stackoverflow.com/questions/7922526/opengl-deterministic-rendering-between-gpu-vendor)
@ -59,13 +60,15 @@ CSS:    /style.css
 - [Line and circle rasterization](http://www.sunshine2k.de/coding/java/Bresenham/RasterisingLinesCircles.pdf)
 - [Occlusion culling of Vintage Story](https://github.com/tyronx/occlusionculling)
 - [Minecraft work on cave occlusion, in 2 parts](https://tomcc.github.io/2014/08/31/visibility-1.html)
 - [Awesome article on hashtables](https://thenumb.at/Hashtables/)
 - [Order independent blending technique](https://jcgt.org/published/0002/02/09/)
 - [High performance voxel engine](https://nickmcd.me/2021/04/04/high-performance-voxel-engine/)
 - [Monotone meshing](https://blackflux.wordpress.com/tag/monotone-meshing/)
 - [Capsule collision detection](https://wickedengine.net/2020/04/26/capsule-collision-detection/)
 - [Forsyth vertex cache optimization](https://tomforsyth1000.github.io/papers/fast_vert_cache_opt.html)
 - [Depth buffer based lighting](https://www.researchgate.net/publication/320616607_Eye-Dome_Lighting_a_non-photorealistic_shading_technique)
 - [Computational Geometry in C (Second Edition)](http://www.science.smith.edu/~jorourke/books/compgeom.html)
 - [OpenGL FAQ](https://www.opengl.org/archives/resources/faq/technical/)
 - [SGI BSP FAQ](https://web.archive.org/web/20010614072959/http://reality.sgi.com/bspfaq/)
 ## generational stuff
 - [Domain warping](https://iquilezles.org/articles/warp/)
@ -80,7 +83,6 @@ CSS:    /style.css
 ## notable extensions
 - [Vertex array locking](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_compiled_vertex_array.txt)
 - [Packed pixels](https://people.freedesktop.org/~marcheu/extensions/EXT/packed_pixels.html)
 - [Provoking vertex](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_provoking_vertex.txt)
 - [Framebuffer fetch](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_shader_framebuffer_fetch.txt)
 - [Integer textures](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_integer.txt)
 - [Texture swizzle](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_swizzle.txt)
@ -97,4 +99,10 @@ CSS:    /style.css
 - [Shader inter group communication](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_shader_ballot.txt)
 - [Granular buffer memory control](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_sparse_buffer.txt)
 - [Window pos](https://people.freedesktop.org/~marcheu/extensions/ARB/window_pos.html)
- [Optimized fixed function fog](https://people.freedesktop.org/~marcheu/extensions/doc/fog_coord.html)
+- [No perspective interpolation for screen aligned geometry](https://registry.khronos.org/OpenGL/extensions/NV/NV_shader_noperspective_interpolation.txt)
 ## data representations
 - [Efficient varying-length integers](https://john-millikin.com/vu128-efficient-variable-length-integers)
 - [Awesome article on hashtables](https://thenumb.at/Hashtables/)
 - [Crit-bit trees](https://cr.yp.to/critbit.html)
 - [QP tries](https://dotat.at/prog/qp/README.html)
--- a/articles/oscillators/page.mmd
+++ b/articles/oscillators/page.mmd
@ -51,12 +51,16 @@ struct sqrtwave {
    uint32_t u;
  } v;
 } init_sqrtwave(float frequency, float phase, float amplitude) {
-  struct sqrtwave r;
+    struct sqrtwave r;
-  r.w = init_sinewave(frequency, phase, 1.f);
+  union {
-  v.f = r.w.s;
+    float f;
-  a.f = amplitude;
+    uint32_t u;
-  r.v.u = (a.u & 0x7fffffff) | (v.u & 0x80000000);
+  } v, a;
-  return r;
+    r.w = init_sinewave(frequency, phase, 1.f);
    v.f = r.w.s;
    a.f = amplitude;
    r.v.u = (a.u & 0x7fffffff) | (v.u & 0x80000000);
    return r;
 }
 /* Use floating point bit representation to infer sign, all other bits are set to amplitude */
--- a/articles/rodata-lookup-caching/page.mmd
+++ b/articles/rodata-lookup-caching/page.mmd
@ -0,0 +1,90 @@
 Title:  Lookup Caching by .rodata Section String Inference
 Brief:  Rather hacky, but working way of string key lookup acceleration.
 Date:   1722127090
 Tags:   Programming, Optimization, C, Linux
 CSS:    /style.css
 While working on our immediate no-state engine, the need for texture lookup optimization arose.
 API is designed in a way where every single pushed triangle means resolution of texture by path.
 My insane mind came to such optimization then: detect whether given path pointer is in .rodata and if so, -
 just lookup by hash of the pointer itself, not whole varying-size string. Constant time and all that.
 For that I ended up writing a limited ELF parsing routine that expects `/proc/self/exe`.
 Virtual address space randomization was tricky until I realized that
 `getauxval(AT_ENTRY) - ehdr.e_entry` could be used to get the base process address.
 After the section bounds are known, - it's as simple as checking `vm_start >= ptr && ptr < vm_end`.
 ### Code ###
 ```c
 /* code is fully self-contained, feel free to use it :) */
 #include <fcntl.h>
 #include <unistd.h>
 #include <sys/auxv.h>
 #include <elf.h>
 #include <linux/limits.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 bool infer_elf_section_bounds(const char *const restrict name,
                              const char **restrict vm_start,
                              const char **restrict vm_end)
 {
    bool result = false;
    char buf[PATH_MAX];
    ssize_t l = readlink("/proc/self/exe", buf, PATH_MAX);
    if (l == -1)
        goto ERR_CANT_READLINK;
    buf[l] = 0; /* readlink() doesn't write a terminator */
    int elf = open(buf, O_RDONLY);
    if (elf == -1)
        goto ERR_CANT_OPEN_SELF;
    /* elf header */
    Elf64_Ehdr ehdr;
    read(elf, &ehdr, sizeof ehdr);
    if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
            ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
            ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
            ehdr.e_ident[EI_MAG3] != ELFMAG3)
        goto ERR_NOT_ELF;
    /* section header string table */
    Elf64_Shdr shstrdr;
    lseek(elf, ehdr.e_shoff + ehdr.e_shstrndx * sizeof (Elf64_Shdr), SEEK_SET);
    read(elf, &shstrdr, sizeof shstrdr);
    char *sh = malloc(shstrdr.sh_size);
    lseek(elf, shstrdr.sh_offset, SEEK_SET);
    read(elf, sh, shstrdr.sh_size);
    /* walk sections searching for needed name */
    lseek(elf, ehdr.e_shoff, SEEK_SET);
    for (size_t s = 0; s < ehdr.e_shnum; ++s) {
        Elf64_Shdr shdr;
        read(elf, &shdr, sizeof shdr);
        if (strcmp(&sh[shdr.sh_name], name) == 0) {
            result = true;
            *vm_start = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr;
            *vm_end   = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr + shdr.sh_size;
            break;
        }
    }
    free(sh);
 ERR_NOT_ELF:
    close(elf);
 ERR_CANT_OPEN_SELF:
 ERR_CANT_READLINK:
    return result;
 }
 ```
--- a/config.py
+++ b/config.py
@ -34,3 +34,12 @@ logo = "/logo.png"
 ## Language specifier, used in RSS feed.
 ##
 language = "en"
 ## Port that is used to listed to remote git push signals.
 ##
 webhook_port = 14032
 ## Something that only git hosting and your server should know.
 ## See: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Authorization
 ##
 webhook_auth = "Basic you-secure-credentials"
--- a/local_host.sh
+++ b/local_host.sh
--- a/prepare.sh
+++ b/prepare.sh
@ -3,6 +3,7 @@
 set +e
 git submodule update --init --recursive
 git-lfs fetch
 (cd tools/mmd && make release)
 (cd tools/mmd/build && make)
--- a/remote_host.sh
+++ b/remote_host.sh
@ -0,0 +1,3 @@
 #!/usr/bin/env bash
 python3 -m http.server --directory ./html/ & python3 ./tools/git_webhook.py
--- a/tools/git_webhook.py
+++ b/tools/git_webhook.py
@ -0,0 +1,56 @@
 #!/usr/bin/env python3
 from http.client import parse_headers
 from http.server import BaseHTTPRequestHandler
 from http.server import HTTPServer
 import subprocess
 import config
 ## Simple way to automatically pull and recompile on a remote server.
 ## Run this from the root directory.
 ##
 ## Currently supports:
 ##  - Gitea (Via GET method).
 ##
 class HttpHandler(BaseHTTPRequestHandler):
    def do_GET(self):
        got_gitea_push_event = False
        got_auth = False
        for header in self.headers:
            match [header, self.headers[header]]:
                case ["X-Gitea-Event", "push"]:
                    got_gitea_push_event = True
                case ["Authorization", config.webhook_auth]:
                    got_auth = True
        if not got_gitea_push_event or not got_auth:
            self.send_response(400)
            return
        # todo: This way of doing it blocks both parties. Not ideal.
        self.send_response(200)
        subprocess.run(["git", "pull"])
        subprocess.run(["./compile.sh"])
        print("Pulled and recompiled.")
 def run(server_class=HTTPServer, handler_class=HttpHandler):
    server_address = ('', config.webhook_port)
    httpd = server_class(server_address, handler_class)
    try:
        httpd.serve_forever()
    except KeyboardInterrupt:
        httpd.server_close()
 if __name__ == "__main__":
    run()
--- a/upload.sh
+++ b/upload.sh
@ -2,6 +2,14 @@
 set +e
 for arg in "$*"
 do
  case "$arg" in
    "--fresh") find ./html/ -type f -name '*.upload-checksum' -delete
      ;;
  esac
 done
 for cur in ./html/{*,*/*,*/*/*}; do
  if [ -f "$cur" ] && [[ ! "$cur" == *.upload-checksum ]]; then
    if [ -f "$cur.upload-checksum" ]; then
Author	SHA1	Message	Date
veclavtalica	ff583a3724	article: circle-rasterization	2025-01-25 02:32:05 +03:00
veclavtalica	6f979a1905	add link	2024-10-14 14:55:56 +03:00
veclavtalica	b8c5be8052	prepare.sh: add git-lfs fetch	2024-09-03 14:13:03 +03:00
veclavtalica	5555808095	introduce dichotomy of local and remote host	2024-09-03 13:28:23 +03:00
veclavtalica	2ea2ce8e54	git_webhook.py: rewrite comment to reflect the reality	2024-09-03 13:27:46 +03:00
veclavtalica	13d46ad901	gitea webhook service	2024-09-03 13:22:49 +03:00
veclavtalica	7972becddd	update links	2024-07-30 23:28:40 +03:00
veclavtalica	16c045ba97	update	2024-07-30 23:28:33 +03:00
veclavtalica	7f0d22e5dc	rodata-lookup-caching: fix grammar	2024-07-28 12:33:40 +03:00
veclavtalica	a0ee2169e5	rodata-lookup-caching	2024-07-28 04:06:30 +03:00
veclavtalica	aa2f7a45b8	upload.sh: --fresh flag	2024-07-28 04:06:19 +03:00
veclavtalica	d1fe7a9230	fast-quad-rotation	2024-07-28 03:35:32 +03:00
veclav talica	12b32acb2b	oscillators: fix	2024-06-15 14:30:50 +05:00
veclav talica	b06759de76	links: data structures	2024-06-15 14:30:35 +05:00
		`@ -0,0 +1,3 @@`
							`#!/usr/bin/env bash`

							`python3 -m http.server --directory ./html/ & python3 ./tools/git_webhook.py`