diff --git a/articles/circle-rasterization/.static/circles.webp b/articles/circle-rasterization/.static/circles.webp new file mode 100644 index 0000000..f3bfd1e Binary files /dev/null and b/articles/circle-rasterization/.static/circles.webp differ diff --git a/articles/circle-rasterization/page.mmd b/articles/circle-rasterization/page.mmd new file mode 100644 index 0000000..9962fb3 --- /dev/null +++ b/articles/circle-rasterization/page.mmd @@ -0,0 +1,84 @@ +Title: Circle Rasterization +Brief: Investigation on fast grid-aligned circle rasterization. +Date: 1737757212 +Tags: Programming, Optimization, C +CSS: /style.css + +![](/articles/circle-rasterization/circles.webp) + +Currently drastically overthinking anything related to dream Minecraft-like game of mine, +and today it was all about chunk loading. Particularly, ideal way to infer which chunks +should be loaded based on distance to the viewer, instead of typical direct grid. + +For that circle rasterization is needed. I came up with following pieces of code, one reusable macro, +and others are meant to be directly copy pasted where needed: + +Macro: +```c +/* Emits `x` and `y` for every intersecting cell */ +/* We snap position to the nearest corner, which means there's no aliasing */ +/* It works great for integer radii */ +#define m_iter_circle_pixels(p_center_x, p_center_y, p_radius) \ + for (float y = (p_center_y + ceilf(p_radius)) - 1; y > (p_center_y - ceilf(p_radius)) - 1; --y) \ + for (float x = p_center_x - ceilf(sqrtf(p_radius * p_radius - (y - p_center_y + (y <= p_center_y)) * (y - p_center_y + (y <= p_center_y)))); \ + x < p_center_x + ceilf(sqrtf(p_radius * p_radius - (y - p_center_y + (y <= p_center_y)) * (y - p_center_y + (y <= p_center_y)))); ++x) +``` + +Floating point based one: +```c +float const rs = state->r * state->r; +float const cr = ceilf(state->r); +for (float iy = -cr; iy <= cr - 1; ++iy) { + float const dx = ceilf(sqrtf(rs - (iy + (iy <= 0)) * (iy + (iy <= 0)))); + for (float ix = -dx; ix < dx; ++ix) { + /* iy and ix are floating point offsets from (0, 0) */ + } +} +``` + +Integer math based one: +```c +/* Neat shorthand making integer based loops drastically faster */ +static int32_t ceil_sqrt(int32_t const n) { + int32_t res = 1; + #pragma clang loop unroll_count(8) + while(res * res < n) + res++; + return res; +} + +/* This one beats the float in raw performance, but might scale worse at increasing radii, assuming sqrt is a hardware intrinsic with known worst time */ +int32_t const rsi = (int32_t)state->r * (int32_t)state->r; +for (int32_t iy = -(int32_t)state->r; iy <= (int32_t)state->r - 1; ++iy) { + int32_t const dx = ceil_sqrt(rsi - (iy + (iy <= 0)) * (iy + (iy <= 0))); + for (int32_t ix = -dx; ix < dx; ++ix) { + /* iy and ix are integer offsets from (0, 0) */ + } +} +``` + +Integer math based with accumulated ceil(sqrt()), the fastest I could come up with: +```c +int32_t const rsi = (int32_t)state->r * (int32_t)state->r; +int32_t acc = 1; +for (int32_t iy = (int32_t)state->r - 1; iy >= 0; --iy) { + while (acc * acc < rsi - iy * iy) acc++; + for (int32_t ix = -acc; ix < acc; ++ix) { + /* lower portion */ + x = (float)ix; + y = (float)iy; + /* upper portion */ + x = (float)ix; + y = (float)-iy - 1; + } +} +``` + +Note that they assume center point at coordinate origin, quadrant symmetry and whole number radii. + +Benchmarks: +``` +Profile 'float' on average took: 0.001537s, worst case: 0.003272s, sample count: 277 +Profile 'int32_t' on average took: 0.000726s, worst case: 0.002293s, sample count: 277 +Profile 'int32_t acc' on average took: 0.000650s, worst case: 0.001732s, sample count: 277 +```