Compare commits

...

82 Commits

Author SHA1 Message Date
veclavtalica
ff583a3724 article: circle-rasterization 2025-01-25 02:32:05 +03:00
6f979a1905 add link 2024-10-14 14:55:56 +03:00
b8c5be8052 prepare.sh: add git-lfs fetch 2024-09-03 14:13:03 +03:00
5555808095 introduce dichotomy of local and remote host 2024-09-03 13:28:23 +03:00
2ea2ce8e54 git_webhook.py: rewrite comment to reflect the reality 2024-09-03 13:27:46 +03:00
13d46ad901 gitea webhook service 2024-09-03 13:22:49 +03:00
7972becddd update links 2024-07-30 23:28:40 +03:00
16c045ba97 update 2024-07-30 23:28:33 +03:00
7f0d22e5dc rodata-lookup-caching: fix grammar 2024-07-28 12:33:40 +03:00
a0ee2169e5 rodata-lookup-caching 2024-07-28 04:06:30 +03:00
aa2f7a45b8 upload.sh: --fresh flag 2024-07-28 04:06:19 +03:00
d1fe7a9230 fast-quad-rotation 2024-07-28 03:35:32 +03:00
veclav talica
12b32acb2b oscillators: fix 2024-06-15 14:30:50 +05:00
veclav talica
b06759de76 links: data structures 2024-06-15 14:30:35 +05:00
veclav talica
d6a3f7465d fix brief 2024-04-29 17:33:18 +05:00
veclav talica
64e889f3bc articles: links-of-the-open-world-programmer 2024-04-29 17:29:30 +05:00
veclav talica
4f9b7101c6 add tags to old articles 2024-04-21 12:24:46 +05:00
veclav talica
9d1edbc90d move base url specification to config.py 2024-03-31 16:24:01 +05:00
veclav talica
c46ffec7fc uri decoding in browse.sh 2024-03-31 16:16:12 +05:00
veclav talica
2760d11a67 #!/usr/bin/env based python3 resolution 2024-03-31 15:55:42 +05:00
veclav talica
0a5a556f08 css cleaning, convention developing for styles 2024-03-31 15:46:53 +05:00
veclav talica
0577a56127 configurability 2024-03-31 10:38:30 +05:00
veclav talica
bf09a8ffbb cleaningup of widgets 2024-03-30 22:02:10 +05:00
veclav talica
17287cfcb3 pager-based markdown portable browser 2024-02-24 19:15:37 +05:00
veclav talica
6a7a5f091c article listing in plaintext 2024-02-24 11:33:57 +05:00
veclav talica
fc14d99c92 exposed original markdown article files 2024-02-24 11:16:01 +05:00
veclav talica
0d3afee662 raw markdown accessibility 2024-02-23 23:29:42 +05:00
veclav talica
6a5ee8f800 add logo to main page 2024-02-23 23:19:21 +05:00
veclav talica
d2cfd9fd83 fix mixin_tag newlines 2024-02-23 23:18:57 +05:00
veclav talica
af76490365 center images 2024-02-23 23:18:15 +05:00
veclav talica
78b50d27c2 adapt to mobile devices 2024-02-23 22:52:11 +05:00
veclav talica
5601ecb988 proper-er shell usage 2024-02-16 20:25:19 +05:00
veclav talica
9b959408bb remove cook_tracks.sh 2024-02-16 20:10:20 +05:00
veclav talica
0c89da7c3d host.sh: automatically open the page 2024-02-16 20:04:09 +05:00
veclav talica
41b18b8b05 new-article.sh: utility for creation of article stub in cli 2024-02-16 20:00:26 +05:00
veclav talica
d7a434ea11 fix the_line_after_metadata() 2024-02-16 19:59:57 +05:00
veclav talica
c36ce7dd20 tag listings 2024-02-16 18:29:50 +05:00
veclav talica
715ddbc39f rework to-the-toop footer once again 2024-02-16 17:10:59 +05:00
veclav talica
b8b551869f host.sh 2024-02-16 17:05:17 +05:00
veclav talica
c9ae970a59 format style.css 2024-02-16 17:01:58 +05:00
veclav talica
eb7b9f114e .gitignore: ignore __pycache__ folder 2024-02-16 16:54:24 +05:00
veclav talica
18098a6859 use #!/usr/bin/env bash 2024-02-16 16:53:21 +05:00
veclav talica
f1d6e8e0a2 use $CC in make 2024-02-16 16:50:30 +05:00
veclav talica
f56f102530 add new tags 2024-02-16 16:29:54 +05:00
veclav talica
3ac4bc2965 fix links 2024-02-16 15:25:47 +05:00
veclav talica
dba77ef988 firefox-wasm-tail-call-benchmark 2024-02-16 15:13:16 +05:00
veclav talica
0d401e1274 article: build-pytorch-on-old-amd64 2024-02-13 16:32:58 +05:00
veclav talica
f5966dcff8 add mastodon link for verification 2024-02-11 20:49:27 +05:00
veclav talica
a395c3e016 fix url in preview embeds 2024-02-10 22:53:26 +05:00
veclav talica
760f947c00 push .static contents 2024-02-10 22:38:35 +05:00
veclav talica
bb479c9371 neocities-cache article 2024-02-10 22:27:52 +05:00
veclav talica
2ec92d2dee fix bug in main page article linking 2024-02-10 22:19:17 +05:00
veclav talica
dc929f199e remove xm.js 2024-02-10 22:16:07 +05:00
veclav talica
82b4976efb checksum based cache for neocities uploads 2024-02-10 22:15:32 +05:00
veclav talica
c1ccd1b465 modarche.org link in nav 2024-02-10 21:05:19 +05:00
veclav talica
193ba0b4a5 sorted by date main feed 2024-02-10 20:58:08 +05:00
veclav talica
952a773875 new style, dark style 2024-02-10 20:48:22 +05:00
veclav talica
7807619d58 remove tags from main feed so that it's less crowded 2024-02-10 20:24:09 +05:00
veclav talica
3429e09d15 reworked the footer 2024-02-10 20:20:54 +05:00
veclav talica
ecc283d091 delete the article about introduction of xm player 2024-02-10 20:14:07 +05:00
veclav talica
67ce1832a4 purge xm track player page, it was never good 2024-02-10 20:12:59 +05:00
veclav talica
1098e6551d proper nav 2024-02-10 20:09:30 +05:00
veclav talica
efa3391dbd vector-pi-rotation article 2023-11-09 22:05:37 +05:00
veclav talica
f2020f7463 calling convention notice 2023-11-09 21:59:28 +05:00
veclav talica
44b53e4099 update to simd-rect 2023-09-25 16:49:21 +05:00
veclav talica
8e7391d470 simd-rect article 2023-09-24 22:49:51 +05:00
veclav talica
c15ad9b999 remove unrelated 2023-09-24 22:49:39 +05:00
veclav talica
b7727d6aa3 incremental-delaunay article 2023-09-14 22:24:46 +05:00
veclav talica
3d89ab8ce6 percentage 2023-09-14 22:24:18 +05:00
veclav talica
77aabf61c5 hand optimized simplex page 2023-07-10 18:23:11 +05:00
veclav talica
fc88e3eeef visibility 2d article 2023-06-12 17:39:02 +05:00
veclav talica
1324e402b9 update notice message 2023-06-09 16:22:20 +05:00
veclav talica
3fb5570331 mobile friendly layout 2023-05-26 14:00:48 +05:00
veclav talica
d8f227344c fix article data uploads 2023-05-26 00:00:38 +05:00
veclav talica
8b6c913f6a roads-0, .static data 2023-05-25 23:45:44 +05:00
veclav talica
ca7597ee8f uploading script with skip of persistent files 2023-05-25 23:13:18 +05:00
veclav talica
1e37fcebd2 front image used for embed preview 2023-05-25 22:35:39 +05:00
veclav talica
be6ef1f1c2 no warnings from c :) 2023-05-25 22:17:23 +05:00
veclav talica
ba8327c161 opengraph article integration 2023-05-25 21:58:10 +05:00
veclav talica
37f7bf2c11 waveform generation, fixes of walk 2023-05-25 21:18:25 +05:00
veclav talica
1a18af8baf fixes to oscillators article 2023-05-25 19:05:34 +05:00
veclav talica
3dc8c60a99 ignore hidden directories 2023-05-25 19:04:54 +05:00
57 changed files with 2507 additions and 1652 deletions

2
.gitattributes vendored Normal file
View File

@@ -0,0 +1,2 @@
*.png filter=lfs diff=lfs merge=lfs -text
*.gif filter=lfs diff=lfs merge=lfs -text

10
.gitignore vendored
View File

@@ -1,3 +1,9 @@
**/__pycache__/* **/__pycache__/
html/ html/
./.*/
[articles/**/.static/]
articles/**/.dynamic/
articles/**/.temp/
**/*.jpg/
**/*.png/
**/*.upload-checksum

3
.gitmodules vendored
View File

@@ -1,3 +1,6 @@
[submodule "tools/mmd"] [submodule "tools/mmd"]
path = tools/mmd path = tools/mmd
url = https://github.com/fletcher/MultiMarkdown-6 url = https://github.com/fletcher/MultiMarkdown-6
[submodule "tools/gifenc"]
path = tools/gifenc
url = https://github.com/lecram/gifenc

View File

@@ -0,0 +1,178 @@
extends Node
class_name Visibility2D
# Based on: https://www.redblobgames.com/articles/visibility/Visibility.hx
# Limitations:
# - Segments cant intersect each other, splitting is required for such cases.
# todo: Make it extend plain object, handle lifetime manually.
class EndPoint:
var point: Vector2
var begin: bool
var segment: int
var angle: float
static func sort(p_a: EndPoint, p_b: EndPoint) -> bool:
if p_a.angle > p_b.angle: return true
elif p_a.angle < p_b.angle: return false
elif not p_a.begin and p_b.begin: return true
else: return false
var _endpoints: Array # of EndPoint
var _sorted_endpoints: Array # of EndPoint
var _open: PoolIntArray # of Segment indices
var center: Vector2
var output: PoolVector2Array
# todo: Ability to cache builder state for static geometry.
class Builder:
var target
func view_point(p_point: Vector2) -> Builder:
target.center = p_point
return self
# todo: Use it to cull out endpoints out of working region.
func bounds(p_area: Rect2) -> Builder:
target._add_segment(p_area.position, Vector2(p_area.end.x, p_area.position.y))
target._add_segment(Vector2(p_area.end.x, p_area.position.y), p_area.end)
target._add_segment(p_area.end, Vector2(p_area.position.x, p_area.end.y))
target._add_segment(Vector2(p_area.position.x, p_area.end.y), p_area.position)
return self
func line(p_line: Line2D) -> Builder:
for i in range(0, p_line.points.size() - 1):
target._add_segment(p_line.position + p_line.points[i],
p_line.position + p_line.points[i + 1])
return self
func polygon(p_polygon: Polygon2D) -> Builder:
var points := p_polygon.polygon
for i in range(0, points.size() - 1):
target._add_segment(p_polygon.position + points[i],
p_polygon.position + points[i + 1])
target._add_segment(p_polygon.position + points[points.size() - 1],
p_polygon.position + points[0])
return self
func occluder(p_object: Object) -> Builder:
if p_object is Line2D:
return line(p_object)
elif p_object is Polygon2D:
return polygon(p_object)
else:
push_error("Unknown occluder type")
return self
func finalize():
target._finalize()
func _add_segment(p_point0: Vector2, p_point1: Vector2):
var point0 := EndPoint.new()
var point1 := EndPoint.new()
point0.segment = _endpoints.size()
point1.segment = _endpoints.size()
point0.point = p_point0
point1.point = p_point1
_endpoints.append(point0)
_endpoints.append(point1)
func init_builder() -> Builder:
# todo: Reuse
_endpoints.resize(0)
var result := Builder.new()
result.target = self
return result
func _finalize():
# todo: Only needs to be done when endpoints or center is changed.
for segment in range(0, _endpoints.size(), 2):
var p1 := _endpoints[segment] as EndPoint
var p2 := _endpoints[segment + 1] as EndPoint
p1.angle = (p1.point - center).angle()
p2.angle = (p2.point - center).angle()
# todo: Simplify to one expression.
var da := p2.angle - p1.angle
if da <= PI: da += TAU
if da > PI: da -= TAU
p1.begin = da > 0.0
p2.begin = not p1.begin
func _is_segment_in_front(p_segment1: int, p_segment2: int) -> bool:
var s1p1 := _endpoints[p_segment1].point as Vector2
var s1p2 := _endpoints[p_segment1 + 1].point as Vector2
var s2p1 := _endpoints[p_segment2].point as Vector2
var s2p2 := _endpoints[p_segment2 + 1].point as Vector2
# todo: Can we use something simpler than interpolation?
var d := s1p2 - s1p1
var p := s2p1.linear_interpolate(s2p2, 0.01)
var a1 := (d.x * (p.y - s1p1.y) \
- d.y * (p.x - s1p1.x)) < 0.0
p = s2p2.linear_interpolate(s2p1, 0.01)
var a2 := (d.x * (p.y - s1p1.y) \
- d.y * (p.x - s1p1.x)) < 0.0
var a3 := (d.x * (center.y - s1p1.y) \
- d.y * (center.x - s1p1.x)) < 0.0
if a1 == a2 and a2 == a3: return true
d = s2p2 - s2p1
p = s1p1.linear_interpolate(s1p2, 0.01)
var b1 := (d.x * (p.y - s2p1.y) \
- d.y * (p.x - s2p1.x)) < 0.0
p = s1p2.linear_interpolate(s1p1, 0.01)
var b2 := (d.x * (p.y - s2p1.y) \
- d.y * (p.x - s2p1.x)) < 0.0
var b3 := (d.x * (center.y - s2p1.y) \
- d.y * (center.x - s2p1.x)) < 0.0
return b1 == b2 and b2 != b3
func sweep() -> PoolVector2Array:
output.resize(0)
# todo: Only duplicate and sort on change.
_sorted_endpoints = _endpoints.duplicate()
_sorted_endpoints.sort_custom(EndPoint, "sort")
var start_angle := 0.0
# todo: Inline passes.
for n_pass in range(2):
for p_idx in range(_sorted_endpoints.size() - 1, -1, -1):
var p := _sorted_endpoints[p_idx] as EndPoint
var old := -1 if _open.empty() else _open[0]
if p.begin:
var idx := 0
while idx < _open.size() and _is_segment_in_front(p.segment, _open[idx]):
idx += 1
# warning-ignore:return_value_discarded
_open.insert(idx, p.segment)
else:
var idx := _open.rfind(p.segment)
if idx != -1: _open.remove(idx)
# todo: Second pass can assume that it will be found.
# _open.remove(_open.rfind(p.segment))
if old != (-1 if _open.empty() else _open[0]):
if n_pass == 1:
# todo: Distance should be configurable.
var p3 := _endpoints[old].point as Vector2 if old != -1 else \
center + Vector2(cos(start_angle), sin(start_angle)) * 500.0
var t2 := Vector2(cos(p.angle), sin(p.angle))
var p4 := p3.direction_to(_endpoints[old + 1].point) if old != -1 else t2
var l = Geometry.line_intersects_line_2d(p3, p4, center,
Vector2(cos(start_angle), sin(start_angle)))
if l != null: output.append(l)
l = Geometry.line_intersects_line_2d(p3, p4, center, t2)
if l != null: output.append(l)
start_angle = p.angle
_open.resize(0)
return output

BIN
articles/2d-visibility/.static/example.gif (Stored with Git LFS) Normal file

Binary file not shown.

View File

@@ -0,0 +1,133 @@
Title: 2D Visibility
Brief: Visibility triangles from 2D occluding segment geometry in GDScript.
Date: 1686547796
Tags: Programming, Godot, GDScript
CSS: /style.css
![](/articles/2d-visibility/example.gif)
Based on [Redblobgames' visibility article and Haxe reference implementation](https://www.redblobgames.com/articles/visibility)
Full usable code is [here](/articles/2d-visibility/Visibility2D.gd.txt).
### Explanation ###
First step is determining angles for each segment point as well as denoting
which one gets encountered first.
```gdscript
for segment in range(0, _endpoints.size(), 2):
var p1 := _endpoints[segment] as EndPoint
var p2 := _endpoints[segment + 1] as EndPoint
p1.angle = (p1.point - center).angle()
p2.angle = (p2.point - center).angle()
var da := p2.angle - p1.angle
if da <= PI: da += TAU
if da > PI: da -= TAU
p1.begin = da > 0.0
p2.begin = not p1.begin
```
Then points are sorted by angle and beginning:
```gdscript
static func sort(p_a: EndPoint, p_b: EndPoint) -> bool:
if p_a.angle > p_b.angle: return true
elif p_a.angle < p_b.angle: return false
elif not p_a.begin and p_b.begin: return true
else: return false
```
Then in two passes:
- Walk over sorted points.
- When nearest segment end or another more nearest encountered, -
remember the starting angle and only emit two points representing the visible portion of segment on second pass.
```gdscript
var start_angle := 0.0
for n_pass in range(2):
for p_idx in range(_sorted_endpoints.size() - 1, -1, -1):
var p := _sorted_endpoints[p_idx] as EndPoint
var old := -1 if _open.empty() else _open[0]
if p.begin:
var idx := 0
while idx < _open.size() and _is_segment_in_front(p.segment, _open[idx]):
idx += 1
_open.insert(idx, p.segment)
else:
var idx := _open.rfind(p.segment)
if idx != -1: _open.remove(idx)
if old != (-1 if _open.empty() else _open[0]):
if n_pass == 1:
var p3 := _endpoints[old].point as Vector2 if old != -1 else \
center + Vector2(cos(start_angle), sin(start_angle)) * 500.0
var t2 := Vector2(cos(p.angle), sin(p.angle))
var p4 := p3.direction_to(_endpoints[old + 1].point) if old != -1 else t2
# note: Checks are in case of parallel lines.
var l = Geometry.line_intersects_line_2d(p3, p4, center,
Vector2(cos(start_angle), sin(start_angle)))
if l != null: output.append(l)
l = Geometry.line_intersects_line_2d(p3, p4, center, t2)
if l != null: output.append(l)
start_angle = p.angle
```
Where segment front deciding algorithm is as follows, using cross products:
```gdscript
func _is_segment_in_front(p_segment1: int, p_segment2: int) -> bool:
var s1p1 := _endpoints[p_segment1].point as Vector2
var s1p2 := _endpoints[p_segment1 + 1].point as Vector2
var s2p1 := _endpoints[p_segment2].point as Vector2
var s2p2 := _endpoints[p_segment2 + 1].point as Vector2
var d := s1p2 - s1p1
var p := s2p1.linear_interpolate(s2p2, 0.01)
var a1 := (d.x * (p.y - s1p1.y) \
- d.y * (p.x - s1p1.x)) < 0.0
p = s2p2.linear_interpolate(s2p1, 0.01)
var a2 := (d.x * (p.y - s1p1.y) \
- d.y * (p.x - s1p1.x)) < 0.0
var a3 := (d.x * (center.y - s1p1.y) \
- d.y * (center.x - s1p1.x)) < 0.0
if a1 == a2 and a2 == a3: return true
d = s2p2 - s2p1
p = s1p1.linear_interpolate(s1p2, 0.01)
var b1 := (d.x * (p.y - s2p1.y) \
- d.y * (p.x - s2p1.x)) < 0.0
p = s1p2.linear_interpolate(s1p1, 0.01)
var b2 := (d.x * (p.y - s2p1.y) \
- d.y * (p.x - s2p1.x)) < 0.0
var b3 := (d.x * (center.y - s2p1.y) \
- d.y * (center.x - s2p1.x)) < 0.0
return b1 == b2 and b2 != b3
```
### Usage example ###
Visibility2D.gd class implements builder interface to make it slightly easier to work with.
```gdscript
func _process(_delta):
$Visibility2D.init_builder() \
.view_point(get_global_mouse_position()) \
.bounds(get_viewport_rect()) \
.occluder($Line2D) \
.finalize()
for child in $Cones.get_children():
child.queue_free()
var edges = $Visibility2D.sweep()
for i in range(0, edges.size() - 1, 2):
var polygon := Polygon2D.new()
polygon.polygon = PoolVector2Array([$Visibility2D.center, edges[i], edges[i + 1]])
$Cones.add_child(polygon)
```

View File

@@ -0,0 +1,89 @@
Title: Building Pytorch on an Old Laptop
Brief: Journey of building Pytorch 2.2.0 on 10+ old amd64 laptop.
Date: 1707822832
Tags: Compilation, Bash
CSS: /style.css
This started when I was following my first tutorial on Pytorch to generate toponyms (will probably write about it too later).
Just as I got enough courage to run it for first time I was faced with heartbreaking words, - `Illegal instruction`. Here's the following story:
First I downloaded Pytorch release:
```
git clone --depth 1 https://github.com/pytorch/pytorch --single-branch --branch v2.2.0 pytorch
```
Then struggled for a bit with initialization of submodules, it looks like there are broken links or something???
Anyway, the command is:
```
git submodule update --init --recursive
```
But I had to `rm -fr ./third_party/<> && git rm -fr --cached ./third_party/<>` and then `git submodule add ...` replace a few libs.
Later by trial and error I came up with this script which excludes every part that demands AVX
as well as anything not necessary, as compilation time of it all is ridiculous:
```bash
#!/bin/sh
set +e
# Could comment or override it if you have different compiler you want to use.
export CMAKE_C_COMPILER=clang
export CMAKE_CXX_COMPILER=clang++
export USE_CCACHE=ON
export USE_CUDA=0
export USE_DISTRIBUTED=0
export USE_GLOO=0
export BUILD_TEST=0
export BUILD_CAFFE2=0
export USE_CUDNN=0
export USE_ASAN=0
export USE_MKLDNN=0
export USE_KINETO=0
export DEBUG=0
export USE_XNNPACK=0
export USE_FBGEMM=0
export USE_NNPACK=0
export USE_QNNPACK=0
# You can comment this, but for me throttling was making it slower than just using a single core.
export MAX_JOBS=1
export USE_AVX=OFF
export USE_NNPACK=OFF
export USE_MKLDNN=OFF
export USE_FBGEMM=OFF
export C_HAS_AVX_2=OFF
export C_HAS_AVX2_2=OFF
export CXX_HAS_AVX_2=OFF
export CXX_HAS_AVX2_2=OFF
export CAFFE2_COMPILER_SUPPORTS_AVX512_EXTENSIONS=OFF
export USE_NATIVE_ARCH=ON
export USE_VULKAN=OFF
export USE_SYSTEM_ZSTD=ON
# You can install those from your package manager, but only if you're on up-to-date repos. (so, not stable Debian)
# export USE_SYSTEM_PYBIND11=ON
# export USE_SYSTEM_CPUINFO=ON
# export USE_SYSTEM_SLEEF=ON
python3 setup.py develop
```
It installs itself with dependencies on this very git folder, so you better place it somewhere permanent (I ended up `mv` and `ln` it, just to not forget and delete it from ~/tmp/)
Not sure how to clean the temporary stuff properly after installation, it wastes quite a bit of space (1.7 GiB for me).
In the end I just used this, which shredded about 400 MiBs of space:
```
rm -fr ./third_party
cd ./build && find . -name \*.o -type f -delete
```
That's about it. It's slow but you can play with it for small tasks.

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -0,0 +1,84 @@
Title: Circle Rasterization
Brief: Investigation on fast grid-aligned circle rasterization.
Date: 1737757212
Tags: Programming, Optimization, C
CSS: /style.css
![](/articles/circle-rasterization/circles.webp)
Currently drastically overthinking anything related to dream Minecraft-like game of mine,
and today it was all about chunk loading. Particularly, ideal way to infer which chunks
should be loaded based on distance to the viewer, instead of typical direct grid.
For that circle rasterization is needed. I came up with following pieces of code, one reusable macro,
and others are meant to be directly copy pasted where needed:
Macro:
```c
/* Emits `x` and `y` for every intersecting cell */
/* We snap position to the nearest corner, which means there's no aliasing */
/* It works great for integer radii */
#define m_iter_circle_pixels(p_center_x, p_center_y, p_radius) \
for (float y = (p_center_y + ceilf(p_radius)) - 1; y > (p_center_y - ceilf(p_radius)) - 1; --y) \
for (float x = p_center_x - ceilf(sqrtf(p_radius * p_radius - (y - p_center_y + (y <= p_center_y)) * (y - p_center_y + (y <= p_center_y)))); \
x < p_center_x + ceilf(sqrtf(p_radius * p_radius - (y - p_center_y + (y <= p_center_y)) * (y - p_center_y + (y <= p_center_y)))); ++x)
```
Floating point based one:
```c
float const rs = state->r * state->r;
float const cr = ceilf(state->r);
for (float iy = -cr; iy <= cr - 1; ++iy) {
float const dx = ceilf(sqrtf(rs - (iy + (iy <= 0)) * (iy + (iy <= 0))));
for (float ix = -dx; ix < dx; ++ix) {
/* iy and ix are floating point offsets from (0, 0) */
}
}
```
Integer math based one:
```c
/* Neat shorthand making integer based loops drastically faster */
static int32_t ceil_sqrt(int32_t const n) {
int32_t res = 1;
#pragma clang loop unroll_count(8)
while(res * res < n)
res++;
return res;
}
/* This one beats the float in raw performance, but might scale worse at increasing radii, assuming sqrt is a hardware intrinsic with known worst time */
int32_t const rsi = (int32_t)state->r * (int32_t)state->r;
for (int32_t iy = -(int32_t)state->r; iy <= (int32_t)state->r - 1; ++iy) {
int32_t const dx = ceil_sqrt(rsi - (iy + (iy <= 0)) * (iy + (iy <= 0)));
for (int32_t ix = -dx; ix < dx; ++ix) {
/* iy and ix are integer offsets from (0, 0) */
}
}
```
Integer math based with accumulated ceil(sqrt()), the fastest I could come up with:
```c
int32_t const rsi = (int32_t)state->r * (int32_t)state->r;
int32_t acc = 1;
for (int32_t iy = (int32_t)state->r - 1; iy >= 0; --iy) {
while (acc * acc < rsi - iy * iy) acc++;
for (int32_t ix = -acc; ix < acc; ++ix) {
/* lower portion */
x = (float)ix;
y = (float)iy;
/* upper portion */
x = (float)ix;
y = (float)-iy - 1;
}
}
```
Note that they assume center point at coordinate origin, quadrant symmetry and whole number radii.
Benchmarks:
```
Profile 'float' on average took: 0.001537s, worst case: 0.003272s, sample count: 277
Profile 'int32_t' on average took: 0.000726s, worst case: 0.002293s, sample count: 277
Profile 'int32_t acc' on average took: 0.000650s, worst case: 0.001732s, sample count: 277
```

View File

@@ -0,0 +1,62 @@
Title: Fast Quad Rotation
Brief: A better way to rotate quads around their centers.
Date: 1722126213
Tags: Programming, Optimization, C
CSS: /style.css
A similar in essence trick to [by pi rotation](/articles/vector-pi-rotation.html), but with delta calculated
for some corner which is reused later with negation and coordinate swap.
Additionally `cos(a) = sqrt(1 - sin(a) ^ 2)` is used to reuse the result of sin(a),
with `fast_sqrt()` for good measure.
### Code ###
```c
/* http://www.azillionmonkeys.com/qed/sqroot.html */
static inline float fast_sqrt(float x)
{
union {
float f;
uint32_t u;
} pun = {.f = x};
pun.u += 127 << 23;
pun.u >>= 1;
return pun.f;
}
/* instead of calculating cosf again, - use sinf result */
static inline t_fvec2 fast_cossine(float a) {
const float s = sinf(a);
return (t_fvec2){
.x = fast_sqrt(1.0f - s * s) *
(a >= (float)M_PI_2 && a < (float)(M_PI + M_PI_2) ? -1 : 1),
.y = s
};
}
/* final vertex calculation */
const t_fvec2 t = fast_cossine(sprite.rotation + (float)M_PI_4);
/* scaling by `M_SQRT1_2` is there to retain the quad size (Pythagorean stuffs). */
const t_fvec2 d = {
.x = t.x * sprite.rect.w * (float)M_SQRT1_2,
.y = t.y * sprite.rect.h * (float)M_SQRT1_2,
};
const t_fvec2 c = frect_center(sprite.rect);
/* upper-left */
const t_fvec2 v0 = { c.x - d.x, c.y - d.y };
/* bottom-left */
const t_fvec2 v1 = { c.x - d.y, c.y + d.x };
/* bottom-right */
const t_fvec2 v2 = { c.x + d.x, c.y + d.y };
/* upper-right */
const t_fvec2 v3 = { c.x + d.y, c.y - d.x };
```

View File

@@ -0,0 +1,58 @@
Title: Testing Firefox Wasm Tail Call
Brief: Or why assumptions are not always correct.
Date: 1708076705
Tags: Optimization, Wasm, Interpreters
CSS: /style.css
### Lore ###
Interpreting comes at a cost, the more you nest - the more complex things become.
It's especially true on the web, where any user program already sits on layers and layers
of interfaces. It gets pretty funny, I can't even run ZX Spectrum emulator written in JavaScript with more than few frames a second.
A lot of software targeting the web has their own languages and interpreters (such as Godot and GDScript) and in realtime simulation intensive cases overheads do matter.
One of things that is often suggested for solving interpreter performance is `tail calling`.
And it works emperically on native platforms. [Check this post](https://mort.coffee/home/fast-interpreters/).
And so I wondered, could it work for Wasm platform? Firefox recently [pushed support](https://bugzilla.mozilla.org/show_bug.cgi?id=1846789) for [experimental spec](https://github.com/WebAssembly/tail-call/blob/main/proposals/tail-call/Overview.md) of it, after all.
### Results ###
I based the test interpreter on `fast-interpreters` post linked above.
Sources are available on [github](https://github.com/quantumedbox/wasm-tail-call-interpreter-benchmark). It does nothing, but increments until 100000000,
which is relevant case for nothing, but instruction decoding, which we are testing here.
First, native:
```
time ./jump-table
real 0m3,094s
user 0m3,082s
sys 0m0,012s
time ./tail-call
real 0m2,491s
user 0m2,485s
sys 0m0,005s
```
Run time decrease of `19.3%`! Formidable.
But with web it's more interesting:
```
tail-call.wasm (cold): 10874ms - timer ended
jump-table.wasm (cold): 6610ms - timer ended
```
Tail calls are actually slower in this case (by `39.2%`), which I'm not sure about why yet.
Intuition proven wrong, - but me testing it first proven useful :)
Note: I'm running it on amd64 cpu, stable Firefox 122.0, compiled with Zig's Clang version 16.
Seems like JIT complation on the web is the way to go, to fold everything to Wasm bytecode.
But overall with plain jump-table overheads are *mere* 113.6%, which I would say isn't critical for a lot of cases, especially if interpreter is intended mostly as an interface adapter, which is the case with GDScript.

BIN
articles/hand-opt-simplex-2d/.static/noise.png (Stored with Git LFS) Normal file

Binary file not shown.

View File

@@ -0,0 +1,116 @@
Title: Hand Optimized Simplex 2D
Brief: Results of messing around with moving and hoisting stuff around.
Date: 1688995095
Tags: Programming, GLSL, OpenGL, Optimization
CSS: /style.css
![](/articles/hand-opt-simplex-2d/noise.png)
Based on [webgl-noise repository](https://github.com/ashima/webgl-noise), which is based on [this paper](https://arxiv.org/pdf/1204.1461.pdf).
Things tried:
* Rearranging operations to reduce register pressure.
* Calculating things as soon as possible.
* Hand inlining.
### Results ###
For testing screen space *1024x1024* texture is generated, resulting in *1048576* fragment invocations,
with 4 octave fractal brownian motion.
Hardware: `Mobile Intel® GM45 Express Chipset`
Driver: `DRI Mesa 21.2.6`
Original:
```
Benchmark Iterations Min(ns) Max(ns) Variance Mean(ns)
----------------------------------------------------------------
full(0) 100 124848395 510494575 1473830605484805 129237053
```
Hand optimized:
```
Benchmark Iterations Min(ns) Max(ns) Variance Mean(ns)
----------------------------------------------------------------
full(0) 100 119354512 731397135 3705581696928414 125714847
```
Mean difference is `3ms 522µs 206ns (-2.7%)`, min difference is `5ms 493µs 883ns (-4.4%)`
This suggests that given driver is suboptimal in its optimizing capabilities,
and I imagine there might be GLSL compilers a lot worse than this.
Some intermediate shader representation comes to mind as a mean for
automatic GLSL source level, profile guided and other optimizations;
as well as polyfilling to different extensions, profiles and APIs. But welp.
### Source ###
```glsl
#version 120
// Author : Ian McEwan, Ashima Arts.
// Maintainer : stegu
// Lastmod : 20110822 (ijm)
// License : Copyright (C) 2011 Ashima Arts. All rights reserved.
// Distributed under the MIT License. See LICENSE file.
// https://github.com/ashima/webgl-noise
// https://github.com/stegu/webgl-noise
//
#define MOD289(p_x) ((p_x) - floor((p_x) * (1.0 / 289.0)) * 289.0)
#define PERMUTE(p_result, p_x) { vec3 _temp = (((p_x) * 34.0) + 10.0) * (p_x); p_result = MOD289(_temp); }
float simplex_noise_2d(in vec2 v) {
const vec4 C = vec4(0.211324865405187, // (3.0-sqrt(3.0))/6.0
0.366025403784439, // 0.5*(sqrt(3.0)-1.0)
-0.577350269189626, // -1.0 + 2.0 * C.x
0.024390243902439); // 1.0 / 41.0
// First corner
vec2 i = floor(v + dot(v, C.yy));
vec2 x0 = v - i + dot(i, C.xx);
i = MOD289(i); // Avoid truncation effects in permutation
// i1.x = step( x0.y, x0.x ); // x0.x > x0.y ? 1.0 : 0.0
// i1.y = 1.0 - i1.x;
vec2 i1 = (x0.x > x0.y) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);
// Other corners
// x0 = x0 - 0.0 + 0.0 * C.xx ;
// x1 = x0 - i1 + 1.0 * C.xx ;
// x2 = x0 - 1.0 + 2.0 * C.xx ;
vec4 x12 = x0.xyxy + C.xxzz - vec4(i1.xy, 0.0, 0.0);
// Permutations
vec3 pp;
vec3 p = i.y + vec3(0.0, i1.y, 1.0);
PERMUTE(pp, p);
pp += i.x + vec3(0.0, i1.x, 1.0);
PERMUTE(p, pp);
p = fract(p * C.www);
vec3 m = max(0.5 - vec3(dot(x0, x0), dot(x12.xy, x12.xy), dot(x12.zw, x12.zw)), 0.0);
// Gradients: 41 points uniformly over a line, mapped onto a diamond.
// The ring size 17*17 = 289 is close to a multiple of 41 (41*7 = 287)
vec3 x = 2.0 * p - 1.0;
vec3 a0 = x - floor(x + 0.5);
vec3 h = abs(x) - 0.5;
m = m * m;
m = m * m;
// Normalise gradients implicitly by scaling m
// Approximation of: m *= inversesqrt( a0*a0 + h*h );
m *= 1.79284291400159 - 0.85373472095314 * (a0 * a0 + h * h);
// Compute final noise value at P
return 130.0 * dot(m, vec3(a0.x * x0.x + h.x * x0.y, a0.yz * x12.xz + h.yz * x12.yw));
}
```
### Possibilities
[NVidia's TEGRA guide](https://docs.nvidia.com/drive/drive_os_5.1.6.1L/nvvib_docs/DRIVE_OS_Linux_SDK_Development_Guide/baggage/tegra_gles2_performance.pdf) states that uniform access is often better than constants.
On our hardware it only degrades performance, but there's possibility of other chipsets having similar to TEGRA's preferences.
`C` constant is legible for this.

View File

@@ -0,0 +1,286 @@
//! Based on: https://www.cs.umd.edu/class/spring2020/cmsc754/Lects/lect13-delaun-alg.pdf
//! Optimizations involved:
//! - Cached neighbors for traversal.
//! - Minimal memory footprint.
//! - Cached circumferences.
//! - No circumference calculations for new subdivisions, - circumferences of neighbors are used instead.
//! - Lazy circumference calculation, as some places might not be neighboring new subdivisions.
//! - Extensive use of vectorization.
//! - Care given to linear access of memory.
// todo: This method allows zero area triangles, we need to eliminate them.
// Points that lie on edges can be detected in pointRelation function by == 0 comparison.
const std = @import("std");
// Could be redefined as pleased, but i consider these to be most sensical for given implementation.
pub const VertexComponent = f32;
pub const Vertex = @Vector(2, VertexComponent);
pub const Index = u15;
pub const Area = GenericArea(VertexComponent);
pub const Builder = struct {
triangles: std.ArrayList(Triangle),
vertices: std.ArrayList(Vertex),
allocator: std.mem.Allocator,
// todo: init with expected amount of points to preallocate beforehand.
pub fn init(allocator: std.mem.Allocator, area: Area) !@This() {
var triangles = try std.ArrayList(Triangle).initCapacity(allocator, 2);
errdefer triangles.deinit();
var vertices = try std.ArrayList(Vertex).initCapacity(allocator, 4);
errdefer vertices.deinit();
try vertices.ensureUnusedCapacity(4);
try triangles.ensureUnusedCapacity(2);
for (area.corners()) |corner|
vertices.append(corner) catch unreachable;
triangles.append(Triangle{
.points = [3]Index{ 0, 2, 1 },
.neighbors = [3]?Index{ null, 1, null },
}) catch unreachable;
triangles.append(Triangle{
.points = [3]Index{ 3, 1, 2 },
.neighbors = [3]?Index{ null, 0, null },
}) catch unreachable;
return .{
.triangles = triangles,
.vertices = vertices,
.allocator = allocator,
};
}
pub fn insertAtRandom(self: *@This(), point: Vertex, generator: std.rand.Random) !void {
// Find a triangle the point lies starting from some random triangle.
var abc_index: Index = @intCast(generator.int(Index) % self.triangles.items.len);
var abc = &self.triangles.items[abc_index];
var relation = abc.pointRelation(self.vertices, point);
while (relation != .contained) {
abc_index = abc.neighbors[@intCast(@intFromEnum(relation))].?;
abc = &self.triangles.items[abc_index];
relation = abc.pointRelation(self.vertices, point);
}
// Allocate two new triangles, as well as new vertex.
const new_vertex_index: Index = @intCast(self.vertices.items.len);
try self.vertices.append(point);
const pbc_index: Index = @intCast(self.triangles.items.len);
const apc_index: Index = @intCast(self.triangles.items.len + 1);
try self.triangles.ensureUnusedCapacity(2);
// Divide the abc triangle into three.
abc = &self.triangles.items[abc_index];
// Insert pbc.
self.triangles.append(Triangle{
.points = [3]Index{ new_vertex_index, abc.points[1], abc.points[2] },
.neighbors = [3]?Index{ abc_index, abc.neighbors[1], apc_index },
}) catch unreachable;
// Insert apc.
self.triangles.append(Triangle{
.points = [3]Index{ abc.points[0], new_vertex_index, abc.points[2] },
.neighbors = [3]?Index{ abc_index, pbc_index, abc.neighbors[2] },
}) catch unreachable;
// Update neighbors to be aware of new triangles.
inline for (abc.neighbors[1..], [2]Index{ pbc_index, apc_index }) |n, e|
if (n) |i| {
const p = &self.triangles.items[i];
p.neighbors[p.neighborPosition(abc_index)] = e;
};
// Existing abc is reused.
abc.points[2] = new_vertex_index;
abc.neighbors[1] = pbc_index;
abc.neighbors[2] = apc_index;
abc.circumference = null;
// Recursively adjust edges of triangles so that circumferences are only encasing 3 points at a time.
// todo: Try inlining initial calls via @call(.always_inline, ...).
self.trySwapping(abc_index, 0);
self.trySwapping(pbc_index, 1);
self.trySwapping(apc_index, 2);
}
fn trySwapping(self: @This(), triangle_index: Index, edge: u2) void {
// First find opposite to edge point that lies in neighbor.
const triangle = &self.triangles.items[triangle_index];
const neighbor_index = triangle.neighbors[edge];
if (neighbor_index == null)
return;
const neighbor = &self.triangles.items[neighbor_index.?];
if (neighbor.circumference == null)
neighbor.circumference = Triangle.Circumference.init(neighbor.*, self.vertices);
// Position of neighbor's point opposide to shared with triangle edge.
const point_order = neighbor.nextAfter(triangle.points[edge]);
const point_index = neighbor.points[point_order];
const prev_edge = if (edge == 0) 2 else edge - 1;
if (neighbor.doesFailIncircleTest(self.vertices.items[triangle.points[prev_edge]])) {
// Incircle test failed, swap edges of two triangles and then try swapping newly swapped ones.
const next_edge = (edge + 1) % 3;
const next_point_order = (point_order + 1) % 3;
const prev_point_order = if (point_order == 0) 2 else point_order - 1;
// Update neighbors of triangles in which edge was swapped.
if (triangle.neighbors[next_edge]) |i| {
const n = &self.triangles.items[i];
n.neighbors[n.neighborPosition(triangle_index)] = neighbor_index.?;
}
if (neighbor.neighbors[prev_point_order]) |i| {
const n = &self.triangles.items[i];
n.neighbors[n.neighborPosition(neighbor_index.?)] = triangle_index;
}
const neighbor_prev_point_order_neighbor_index_cache = neighbor.neighbors[prev_point_order];
neighbor.points[prev_point_order] = triangle.points[prev_edge];
neighbor.neighbors[next_point_order] = triangle.neighbors[next_edge];
neighbor.neighbors[prev_point_order] = triangle_index;
neighbor.circumference = null;
triangle.points[next_edge] = point_index;
triangle.neighbors[next_edge] = neighbor_index.?;
triangle.neighbors[edge] = neighbor_prev_point_order_neighbor_index_cache;
triangle.circumference = null;
self.trySwapping(triangle_index, edge);
self.trySwapping(neighbor_index.?, point_order);
}
}
};
const Triangle = struct {
// References to vertices it's composed of, named abc, in CCW orientation.
points: [3]Index,
// References to triangles that are on other side of any edge, if any.
// Order is: ab, bc, ca
neighbors: [3]?Index,
// Lazily calculated and cached for incircle tests.
circumference: ?Circumference = null,
pub const Circumference = struct {
center: Vertex,
radius_squared: VertexComponent, // todo: Way to get a type capable of holding squared values.
pub fn init(triangle: Triangle, vertices: std.ArrayList(Vertex)) @This() {
const a = vertices.items[triangle.points[0]];
const b = vertices.items[triangle.points[1]];
const c = vertices.items[triangle.points[2]];
const ab: Vertex = @splat(magnitudeSquared(a));
const cd: Vertex = @splat(magnitudeSquared(b));
const ef: Vertex = @splat(magnitudeSquared(c));
const cmb = @shuffle(VertexComponent, c - b, undefined, [2]i32{ 1, 0 });
const amc = @shuffle(VertexComponent, a - c, undefined, [2]i32{ 1, 0 });
const bma = @shuffle(VertexComponent, b - a, undefined, [2]i32{ 1, 0 });
const center = ((ab * cmb + cd * amc + ef * bma) / (a * cmb + b * amc + c * bma)) / @as(Vertex, @splat(2));
return .{
.center = center,
.radius_squared = magnitudeSquared(a - center),
};
}
};
// todo: Try perpendicular dot product approach.
pub fn pointRelation(self: @This(), vertices: std.ArrayList(Vertex), point: Vertex) enum(u2) {
outside_ab = 0,
outside_bc = 1,
outside_ca = 2,
contained = 3,
} {
const a = vertices.items[self.points[0]];
const b = vertices.items[self.points[1]];
const c = vertices.items[self.points[2]];
// https://stackoverflow.com/questions/1560492/how-to-tell-whether-a-point-is-to-the-right-or-left-side-of-a-line
const p = point;
// Calculate cross products for all edges at once.
const q = @Vector(12, VertexComponent){ b[0], b[1], c[0], c[1], a[0], a[1], p[1], p[0], p[1], p[0], p[1], p[0] };
const w = @Vector(12, VertexComponent){ a[0], a[1], b[0], b[1], c[0], c[1], a[1], a[0], b[1], b[0], c[1], c[0] };
const e = q - w;
const r = @shuffle(VertexComponent, e, undefined, [6]i32{ 0, 1, 2, 3, 4, 5 });
const t = @shuffle(VertexComponent, e, undefined, [6]i32{ 6, 7, 8, 9, 10, 11 });
const y = r * t;
const u = @shuffle(VertexComponent, y, undefined, [3]i32{ 0, 2, 4 });
const i = @shuffle(VertexComponent, y, undefined, [3]i32{ 1, 3, 5 });
const o = (u - i) > @Vector(3, VertexComponent){ 0, 0, 0 };
// const o = (u - i) <= @Vector(3, VertexComponent){ 0, 0, 0 };
// if (@reduce(.And, o))
// return .contained
// else if (!o[0])
// return .outside_ab
// else if (!o[1])
// return .outside_bc
// else
// return .outside_ca;
const mask = @as(u3, @intFromBool(o[0])) << 2 | @as(u3, @intFromBool(o[1])) << 1 | @as(u3, @intFromBool(o[2]));
return @enumFromInt(@clz(mask));
}
pub inline fn doesFailIncircleTest(self: @This(), point: Vertex) bool {
return magnitudeSquared(self.circumference.?.center - point) < self.circumference.?.radius_squared;
}
// todo: Shouldn't be here.
pub inline fn magnitudeSquared(p: Vertex) VertexComponent {
return @reduce(.Add, p * p);
}
// Finds which point comes after given one, by index, CCW.
// Used to translate point names when traveling between neighbors.
pub inline fn nextAfter(self: @This(), point_index: Index) u2 {
inline for (self.points, 0..) |p, i|
if (point_index == p)
return @intCast((i + 1) % 3);
unreachable;
}
pub inline fn neighborPosition(self: @This(), triangle_index: Index) usize {
inline for (self.neighbors, 0..) |n, i|
if (triangle_index == n)
return i;
unreachable;
}
};
pub fn GenericArea(comptime T: type) type {
return struct {
// note: Upper-left origin is assumed, if second point lies left or up of first it willn't work.
xyxy: @Vector(4, T),
/// Order: Upperleft, upperright, bottomleft, bottomright.
pub fn corners(self: @This()) [4]@Vector(2, T) {
return [4]@Vector(2, T){
@Vector(2, T){ self.xyxy[0], self.xyxy[1] },
@Vector(2, T){ self.xyxy[2], self.xyxy[1] },
@Vector(2, T){ self.xyxy[0], self.xyxy[3] },
@Vector(2, T){ self.xyxy[2], self.xyxy[3] },
};
}
};
}

BIN
articles/incremental-delaunay/.static/web.png (Stored with Git LFS) Normal file

Binary file not shown.

View File

@@ -0,0 +1,35 @@
Title: Optimized Incremental Delaunay
Brief: Classic triangulation algorithm to use for one by one insertion of points, with SIMD and caching.
Date: 1694711563
Tags: Programming, Zig, Generation
CSS: /style.css
![](/articles/incremental-delaunay/web.png)
Based on [this paper](https://www.cs.umd.edu/class/spring2020/cmsc754/Lects/lect13-delaun-alg.pdf)
Full usable isolated code is [here](/articles/incremental-delaunay/incremental-delaunay.zig.txt).
### Usage example ###
```zig
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa.deinit() == .ok);
var triangulator = try Delaunay.Builder.init(gpa.allocator(), Delaunay.Area{-1, -1, 1, 1});
const point_count = 128;
var prng = std.rand.DefaultPrng.init(123123);
const rng = prng.random();
for (0..point_count) |_| {
const x = rng.float(f32) * 2 - 1;
const y = rng.float(f32) * 2 - 1;
try triangulator.insertAtRandom(Delaunay.Vertex{ x, y }, rng);
}
var triangles: [point_count * 2 + 2]gfx.triangle.ScreenspaceTriangle = undefined;
for (&triangles, triangulator.triangles.items) |*out, in| {
out.a = triangulator.vertices.items[in.points[0]];
out.b = triangulator.vertices.items[in.points[1]];
out.c = triangulator.vertices.items[in.points[2]];
}
```

View File

@@ -0,0 +1,108 @@
Title: Links of the Open World Programmer
Brief: Various links to articles and technical documentation I've found along the way of making my engines, mostly centered around OpenGL.
Date: 1714393506
Tags: Programming, OpenGL, GLSL, Generation
CSS: /style.css
## optimization articles
- [Various instancing hacks and methods, with benchmarks](https://solhsa.com/instancing.html)
- [Buffered memory can have drastic differences in read speed](https://community.intel.com/t5/Developing-Games-on-Intel/glMapBuffer-reading-mapped-memory-is-very-slow/td-p/1059726)
- [Selecting device is problematic with opengl, but there are some hacky ways](https://stackoverflow.com/questions/68469954/how-to-choose-specific-gpu-when-create-opengl-context)
- [For linux environments it's a lot messier](https://bbs.archlinux.org/viewtopic.php?id=266456)
- [About vertex caching in instanced draw](http://eelpi.gotdns.org/papers/fast_vert_cache_opt.html)
- [Two level indexing](https://stackoverflow.com/questions/11148567/rendering-meshes-with-multiple-indices)
- [OpenGL Insights](https://openglinsights.com/)
- [Da Pipeline](https://fgiesen.wordpress.com/2011/07/09/a-trip-through-the-graphics-pipeline-2011-index/)
- [Apple guide on GLES](https://developer.apple.com/library/archive/documentation/3DDrawing/Conceptual/OpenGLES_ProgrammingGuide/Introduction/Introduction.html)
- [About GPU cache](https://www.rastergrid.com/blog/gpu-tech/2021/01/understanding-gpu-caches/)
- [Nvidia guide](https://docs.nvidia.com/drive/drive_os_5.1.6.1L/nvvib_docs/index.html#page/DRIVE_OS_Linux_SDK_Development_Guide/Graphics/graphics_opengl.html)
- [Optimizing Triangle Strips for Fast Rendering](http://www.cs.umd.edu/gvil/papers/av_ts.pdf)
- [TEGRA specific GLES2 guide](https://docs.nvidia.com/drive/drive_os_5.1.6.1L/nvvib_docs/DRIVE_OS_Linux_SDK_Development_Guide/baggage/tegra_gles2_performance.pdf)
- [List of old NVidia GLSL pragmas](http://www.icare3d.org/news_articles/nvidia_glsl_compiling_options.html)
- [Radeon 9XXX series optimization guide](https://people.freedesktop.org/~mareko/radeon-9700-opengl-programming-and-optimization-guide.pdf)
- [GLSL optimizations](https://www.khronos.org/opengl/wiki/GLSL_Optimizations)
- [IPhone 3D Programming book](https://www.oreilly.com/library/view/iphone-3d-programming/9781449388133/bk01-toc.html)
- [Article about cache utilization tips and techniques](https://johnnysswlab.com/make-your-programs-run-faster-by-better-using-the-data-cache/)
- [Pixel Buffer Object](http://www.songho.ca/opengl/gl_pbo.html)
- [Screen quads](https://stackoverflow.com/questions/2588875/whats-the-best-way-to-draw-a-fullscreen-quad-in-opengl-3-2)
- [Performance problems with framebuffer swaps](https://stackoverflow.com/questions/10729352/framebuffer-fbo-render-to-texture-is-very-slow-using-opengl-es-2-0-on-android)
- [Matrices inside textures](https://stackoverflow.com/questions/29672810/efficient-way-to-manage-matrices-within-a-graphic-application-using-texture-buff)
- [OpenGL Insights - Asynchronous Buffer Transfers](https://zbook.org/read/448e5c_opengl-insights-university-of-pennsylvania.html)
- [Scene rendering techniques presentation](https://on-demand.gputechconf.com/gtc/2014/presentations/S4379-opengl-44-scene-rendering-techniques.pdf)
- [High-performance extension galore](http://behindthepixels.io/assets/files/High-performance,%20Low-Overhead%20Rendering%20with%20OpenGL%20and%20Vulkan%20-%20Edward%20Liu.pdf)
- [NVidia example on shader based occlusion culling](https://github.com/nvpro-samples/gl_occlusion_culling)
- [Packing](http://smt565.blogspot.com/2011/04/bit-packing-depth-and-normals.html)
- [Thread on texture DMA when transferring](https://community.khronos.org/t/texture-performance/49104)
- [More recent DMA thread](https://community.khronos.org/t/direct-memory-access-in-opengl/108312/22)
- [Mesa driven GLSL optimizer, might be relevant for devices with poor optimizing compilers](https://github.com/aras-p/glsl-optimizer)
- [In-depth OpenGL feature overview with hardware support listed](https://www.g-truc.net/doc/Effective%20OpenGL.pdf)
- [Z-order curve to increase locality of multidimensional data](https://en.wikipedia.org/wiki/Z-order_curve#Texture_mapping)
- [Thread group locality](https://developer.nvidia.com/blog/optimizing-compute-shaders-for-l2-locality-using-thread-group-id-swizzling/)
- [Shader-db to prove instruction counts](https://blogs.igalia.com/apinheiro/2015/09/optimizing-shader-assembly-instruction-on-mesa-using-shader-db/)
- [Texture cache](https://computergraphics.stackexchange.com/questions/357/is-using-many-texture-maps-bad-for-caching)
- [Hierarchical Z map occlusion culling](https://www.rastergrid.com/blog/2010/10/hierarchical-z-map-based-occlusion-culling/)
- [Reducing driver overhead](https://gdcvault.com/play/1020791/)
- [Persistent mapping](https://www.khronos.org/opengl/wiki/Buffer_Object#Persistent_mapping)
- [Post transform cache friendly way of rendering regular grids](http://www.ludicon.com/castano/blog/2009/02/optimal-grid-rendering/)
- [Discussion on above](https://community.khronos.org/t/optimize-grid-rendering-for-post-t-l-cache/72272/9)
- [Vertex optimization on modern GPUs](https://www.tugraz.at/fileadmin/user_upload/Institute/ICG/Images/team_steinberger/Pipelines/HPG-2018_shading_rate-authorversion.opt.pdf)
- [General SIMD usage and techniques](https://repository.dl.itc.u-tokyo.ac.jp/record/48871/files/A32992.pdf)
- [Fragment friendly circle meshing](http://www.humus.name/index.php?page=News&ID=228)
- [Occlusion culling for terrain](https://www.researchgate.net/publication/248358913_Voxel_Column_Culling_Occlusion_Culling_For_Large_Terrain_Models)
- [Billboard quad transformation optimization](https://gamedev.stackexchange.com/questions/201963/efficient-calculation-of-billboard-sprite-transformations)
- [NVidia bindless extensions](https://developer.download.nvidia.com/opengl/tutorials/bindless_graphics.pdf)
- [hacksoflife blog, full of good things](http://hacksoflife.blogspot.com/search/label/OpenGL)
## technical stuff
- [Determinism between opengl vendors](https://stackoverflow.com/questions/7922526/opengl-deterministic-rendering-between-gpu-vendor)
- [Early fragment test, my beloved](https://www.khronos.org/opengl/wiki/Early_Fragment_Test)
- [Shape digitalization](https://tug.org/docs/hobby/hobby-thesis.pdf)
- [Line and circle rasterization](http://www.sunshine2k.de/coding/java/Bresenham/RasterisingLinesCircles.pdf)
- [Occlusion culling of Vintage Story](https://github.com/tyronx/occlusionculling)
- [Minecraft work on cave occlusion, in 2 parts](https://tomcc.github.io/2014/08/31/visibility-1.html)
- [Order independent blending technique](https://jcgt.org/published/0002/02/09/)
- [High performance voxel engine](https://nickmcd.me/2021/04/04/high-performance-voxel-engine/)
- [Monotone meshing](https://blackflux.wordpress.com/tag/monotone-meshing/)
- [Capsule collision detection](https://wickedengine.net/2020/04/26/capsule-collision-detection/)
- [Forsyth vertex cache optimization](https://tomforsyth1000.github.io/papers/fast_vert_cache_opt.html)
- [Depth buffer based lighting](https://www.researchgate.net/publication/320616607_Eye-Dome_Lighting_a_non-photorealistic_shading_technique)
- [Computational Geometry in C (Second Edition)](http://www.science.smith.edu/~jorourke/books/compgeom.html)
- [OpenGL FAQ](https://www.opengl.org/archives/resources/faq/technical/)
- [SGI BSP FAQ](https://web.archive.org/web/20010614072959/http://reality.sgi.com/bspfaq/)
## generational stuff
- [Domain warping](https://iquilezles.org/articles/warp/)
- [Portable and fast Perlin noise in legacy GLSL](https://arxiv.org/abs/1204.1461)
- [Evaluation of GPU noise hashing solutions](https://jcgt.org/published/0009/03/02/paper.pdf)
- [SHISHUA](https://espadrine.github.io/blog/posts/shishua-the-fastest-prng-in-the-world.html)
- [Generalized lattice noise](https://www.codeproject.com/Articles/785084/A-generic-lattice-noise-algorithm-an-evolution-of)
- [Procedural hydrology](https://nickmcd.me/2020/04/15/procedural-hydrology/)
- [Tectonics](https://nickmcd.me/2020/12/03/clustered-convection-for-simulating-plate-tectonics/)
- [Approximation of heightmaps](https://www.cs.cmu.edu/~garland/scape/scape.pdf)
## notable extensions
- [Vertex array locking](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_compiled_vertex_array.txt)
- [Packed pixels](https://people.freedesktop.org/~marcheu/extensions/EXT/packed_pixels.html)
- [Framebuffer fetch](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_shader_framebuffer_fetch.txt)
- [Integer textures](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_integer.txt)
- [Texture swizzle](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_swizzle.txt)
- [Shader binaries](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_get_program_binary.txt)
- [Internal format query](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_internalformat_query2.txt)
- [Direct state access](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_direct_state_access.txt)
- [Texture view](https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_view.txt)
- [No error](https://registry.khronos.org/OpenGL/extensions/KHR/KHR_no_error.txt)
- [Trinary min and max](https://registry.khronos.org/OpenGL/extensions/AMD/AMD_shader_trinary_minmax.txt)
- [NV occlusion query, with partitioning, without locking, potentially with less overdraw](https://registry.khronos.org/OpenGL/extensions/NV/NV_conditional_render.txt)
- [ES2 compatibility, can be used to query precision of floats](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_ES2_compatibility.txt)
- [Pipeline stats](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_pipeline_statistics_query.txt)
- [Parallel shader compile](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_parallel_shader_compile.txt)
- [Shader inter group communication](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_shader_ballot.txt)
- [Granular buffer memory control](https://registry.khronos.org/OpenGL/extensions/ARB/ARB_sparse_buffer.txt)
- [Window pos](https://people.freedesktop.org/~marcheu/extensions/ARB/window_pos.html)
- [No perspective interpolation for screen aligned geometry](https://registry.khronos.org/OpenGL/extensions/NV/NV_shader_noperspective_interpolation.txt)
## data representations
- [Efficient varying-length integers](https://john-millikin.com/vu128-efficient-variable-length-integers)
- [Awesome article on hashtables](https://thenumb.at/Hashtables/)
- [Crit-bit trees](https://cr.yp.to/critbit.html)
- [QP tries](https://dotat.at/prog/qp/README.html)

View File

@@ -0,0 +1,31 @@
Title: Cached Neocities Uploads
Brief: Making uploading of directories to Neocities less painful.
Date: 1707585916
Tags: Programming, Bash, Script
CSS: /style.css
Quick and dirty Bash-based sha256sum checksum solution to create stamps for later checking and rejection.
```bash
#!/usr/bin/bash
for cur in ./html/{*,*/*,*/*/*}; do
if [ -f "$cur" ] && [[ ! "$cur" == *.upload-checksum ]]; then
if [ -f "$cur.upload-checksum" ]; then
c=$(cat "$cur.upload-checksum" | sha256sum -c 2> /dev/null)
if [[ "$c" == *OK ]]; then
echo "$cur is up-to-date, skipping"
continue
fi
fi
echo $(sha256sum "$cur") > "$cur.upload-checksum"
d=$(dirname $(realpath --relative-to="./html" "$cur"))
if [[ "$d" == "." ]]; then
neocities upload $cur
else
neocities upload -d $(dirname $(realpath --relative-to="./html" "$cur")) $cur
fi
fi
done
```

7
articles/oscillators/make Executable file
View File

@@ -0,0 +1,7 @@
#!/usr/bin/env bash
cd $(dirname "$0")
mkdir -p ./.dynamic
mkdir -p ./.temp
$CC -Wno-unused-result -Wno-incompatible-pointer-types waveforms.c ../../tools/gifenc/gifenc.c -I../../tools -O2 -o ./.temp/waveforms
./.temp/waveforms

View File

@@ -14,6 +14,8 @@ Inspirations are taken from [musicdsp](https://www.musicdsp.org/).
### Sine/Cosine ### ### Sine/Cosine ###
![](/articles/oscillators/sine.gif)
```c ```c
/* Intended to be executed offline with values then embedded in the binary. /* Intended to be executed offline with values then embedded in the binary.
* By having usage of glibc sin and cos functions strictly offline it's easier to have it freestanding * By having usage of glibc sin and cos functions strictly offline it's easier to have it freestanding
@@ -38,6 +40,8 @@ void pump_sinewave(struct sinewave *wave) {
### Square ### ### Square ###
![](/articles/oscillators/sqrt.gif)
```c ```c
/* Implemented over sinewave */ /* Implemented over sinewave */
struct sqrtwave { struct sqrtwave {
@@ -48,8 +52,14 @@ struct sqrtwave {
} v; } v;
} init_sqrtwave(float frequency, float phase, float amplitude) { } init_sqrtwave(float frequency, float phase, float amplitude) {
struct sqrtwave r; struct sqrtwave r;
union {
float f;
uint32_t u;
} v, a;
r.w = init_sinewave(frequency, phase, 1.f); r.w = init_sinewave(frequency, phase, 1.f);
r.v.f = amplitude; v.f = r.w.s;
a.f = amplitude;
r.v.u = (a.u & 0x7fffffff) | (v.u & 0x80000000);
return r; return r;
} }
@@ -68,6 +78,8 @@ void pump_sqrtwave(struct sqrtwave *wave) {
### Saw ### ### Saw ###
![](/articles/oscillators/sawt.gif)
```c ```c
struct sawtwave { struct sawtwave {
float v, a, i; float v, a, i;
@@ -75,7 +87,7 @@ struct sawtwave {
struct sawtwave r; struct sawtwave r;
r.v = sinf(phase) * amplitude; r.v = sinf(phase) * amplitude;
r.a = amplitude; r.a = amplitude;
r.i = frequency / AUDIO_FRAME_RATE * amplitude; r.i = 2.f * frequency / AUDIO_FRAME_RATE * amplitude;
return r; return r;
} }
@@ -83,7 +95,11 @@ struct sawtwave {
void pump_sawtwave(struct sawtwave *wave) { void pump_sawtwave(struct sawtwave *wave) {
wave->v += wave->i; wave->v += wave->i;
if (wave->v > wave->a) if (wave->v > wave->a)
wave->v -= wave->a; wave->v -= wave->a * 2.f;
} }
``` ```
### Edits ###
- Fixed initial value based on phase in sqrtwave, proper range for sawtwave.
- Added waveform gifs.

View File

@@ -0,0 +1,138 @@
#include <stdio.h>
#include <inttypes.h>
#include <math.h>
#include <assert.h>
#include <string.h>
#include "gifenc/gifenc.h"
#define NOTE 440
#define AUDIO_FRAME_RATE 44100
#define LENGTH (AUDIO_FRAME_RATE / NOTE)
#define WIDTH LENGTH * 4
#define HEIGHT 128
#define RED 0
#define GREEN 0
#define BLUE 255
static struct sinewave {
float f, s, c;
} init_sinewave(float frequency, float phase, float amplitude) {
struct sinewave r;
r.f = 2.f * sinf((float)M_PI * frequency / (float)AUDIO_FRAME_RATE);
r.s = amplitude * sinf(phase);
r.c = amplitude * cosf(phase);
return r;
}
static float pump_sinewave(struct sinewave *wave) {
wave->s -= wave->f * wave->c;
wave->c += wave->f * wave->s;
return wave->s;
}
static struct sqrtwave {
struct sinewave w;
union {
float f;
uint32_t u;
} v;
} init_sqrtwave(float frequency, float phase, float amplitude) {
struct sqrtwave r;
union {
float f;
uint32_t u;
} v, a;
r.w = init_sinewave(frequency, phase, 1.f);
v.f = r.w.s;
a.f = amplitude;
r.v.u = (a.u & 0x7fffffff) | (v.u & 0x80000000);
return r;
}
static float pump_sqrtwave(struct sqrtwave *wave) {
union {
float f;
uint32_t u;
} v;
pump_sinewave(&wave->w);
v.f = wave->w.s;
wave->v.u = (wave->v.u & 0x7fffffff) | (v.u & 0x80000000);
return wave->v.f;
}
static struct sawtwave {
float v, a, i;
} init_sawtwave(float frequency, float phase, float amplitude) {
struct sawtwave r;
r.v = sinf(phase) * amplitude;
r.a = amplitude;
r.i = 2.f * frequency / AUDIO_FRAME_RATE * amplitude;
return r;
}
static float pump_sawtwave(struct sawtwave *wave) {
wave->v += wave->i;
if (wave->v > wave->a)
wave->v -= wave->a * 2.f;
return wave->v;
}
static int absi(int v) {
return v > 0 ? v : -v;
}
static void plot_line(uint8_t *frame, int x, int xt, int y, int yt) {
int dx = xt - x;
int dy = yt - y;
int step = absi(dx) >= absi(dy) ? absi(dx) : absi(dy);
dx /= step;
dy /= step;
int xc = x;
int yc = y;
int i = 1;
while (i <= step) {
frame[xc + yc * WIDTH] = 1;
xc += dx;
yc += dy;
i++;
}
}
static void generate_wave(char const *filepath, void *generator, float (*pumper)(void *)) {
float wave[LENGTH];
for (int i = 0; i < LENGTH; ++i) {
wave[i] = pumper(generator);
}
uint8_t palette[6] = { [3] = RED, [4] = GREEN, [5] = BLUE };
ge_GIF *g = ge_new_gif(filepath, WIDTH, HEIGHT, palette, 1, 0, 0);
assert(g);
for (int f = 0; f < LENGTH; ++f) {
memset(g->frame, 0, WIDTH * HEIGHT);
for (int i = 0; i < WIDTH; ++i) {
int l0 = (int)((wave[(f + i) % LENGTH] + 1.0f) * 127.5f) / 2;
int l1 = (int)((wave[(f + i + 1) % LENGTH] + 1.0f) * 127.5f) / 2;
if (i == WIDTH - 1)
g->frame[i + l0 * WIDTH] = 1;
else
plot_line(g->frame, i, i + 1, l0, l1);
}
ge_add_frame(g, 1);
}
ge_close_gif(g);
}
int main(void) {
struct sinewave sine = init_sinewave((float)NOTE, 0.0f, 1.0f);
generate_wave(".dynamic/sine.gif", &sine, pump_sinewave);
struct sqrtwave sqrt = init_sqrtwave((float)NOTE, 0.0f, 1.0f);
generate_wave(".dynamic/sqrt.gif", &sqrt, pump_sqrtwave);
struct sawtwave sawt = init_sawtwave((float)NOTE, 0.0f, 1.0f);
generate_wave(".dynamic/sawt.gif", &sawt, pump_sawtwave);
return 0;
}

20
articles/roads-0/page.mmd Normal file
View File

@@ -0,0 +1,20 @@
Title: Roads-0
Brief: Brief moments.
Date: 1685039350
Tags: Photography
CSS: /style.css
### Winter 22-23 ###
![](/articles/roads-0/frosties.jpg)
![](/articles/roads-0/steppe-in-snow.jpg)
![](/articles/roads-0/picturesque.jpg)
![](/articles/roads-0/veins-overtaking.jpg)
![](/articles/roads-0/to-the-skies.jpg)
### Spring 23 ###
![](/articles/roads-0/sweet-carpet.jpg)
![](/articles/roads-0/lonesome-road.jpg)
![](/articles/roads-0/ice-breaker.jpg)
![](/articles/roads-0/over-the-river.jpg)

View File

@@ -0,0 +1,90 @@
Title: Lookup Caching by .rodata Section String Inference
Brief: Rather hacky, but working way of string key lookup acceleration.
Date: 1722127090
Tags: Programming, Optimization, C, Linux
CSS: /style.css
While working on our immediate no-state engine, the need for texture lookup optimization arose.
API is designed in a way where every single pushed triangle means resolution of texture by path.
My insane mind came to such optimization then: detect whether given path pointer is in .rodata and if so, -
just lookup by hash of the pointer itself, not whole varying-size string. Constant time and all that.
For that I ended up writing a limited ELF parsing routine that expects `/proc/self/exe`.
Virtual address space randomization was tricky until I realized that
`getauxval(AT_ENTRY) - ehdr.e_entry` could be used to get the base process address.
After the section bounds are known, - it's as simple as checking `vm_start >= ptr && ptr < vm_end`.
### Code ###
```c
/* code is fully self-contained, feel free to use it :) */
#include <fcntl.h>
#include <unistd.h>
#include <sys/auxv.h>
#include <elf.h>
#include <linux/limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
bool infer_elf_section_bounds(const char *const restrict name,
const char **restrict vm_start,
const char **restrict vm_end)
{
bool result = false;
char buf[PATH_MAX];
ssize_t l = readlink("/proc/self/exe", buf, PATH_MAX);
if (l == -1)
goto ERR_CANT_READLINK;
buf[l] = 0; /* readlink() doesn't write a terminator */
int elf = open(buf, O_RDONLY);
if (elf == -1)
goto ERR_CANT_OPEN_SELF;
/* elf header */
Elf64_Ehdr ehdr;
read(elf, &ehdr, sizeof ehdr);
if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
ehdr.e_ident[EI_MAG3] != ELFMAG3)
goto ERR_NOT_ELF;
/* section header string table */
Elf64_Shdr shstrdr;
lseek(elf, ehdr.e_shoff + ehdr.e_shstrndx * sizeof (Elf64_Shdr), SEEK_SET);
read(elf, &shstrdr, sizeof shstrdr);
char *sh = malloc(shstrdr.sh_size);
lseek(elf, shstrdr.sh_offset, SEEK_SET);
read(elf, sh, shstrdr.sh_size);
/* walk sections searching for needed name */
lseek(elf, ehdr.e_shoff, SEEK_SET);
for (size_t s = 0; s < ehdr.e_shnum; ++s) {
Elf64_Shdr shdr;
read(elf, &shdr, sizeof shdr);
if (strcmp(&sh[shdr.sh_name], name) == 0) {
result = true;
*vm_start = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr;
*vm_end = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr + shdr.sh_size;
break;
}
}
free(sh);
ERR_NOT_ELF:
close(elf);
ERR_CANT_OPEN_SELF:
ERR_CANT_READLINK:
return result;
}
```

156
articles/simd-rect/page.mmd Normal file
View File

@@ -0,0 +1,156 @@
Title: Vectorized Axis-Aligned Rect Ops
Brief: Small detour in making rect type and operations on it in SIMD semantics.
Date: 1695570693
Tags: Programming, Zig, Optimization
CSS: /style.css
### Code ###
Zig's `@shuffle` makes it rather arcane to look at, so be prepared.
```zig
pub fn RectSIMD(comptime T: type) type {
return struct {
xyxy: @Vector(4, T),
pub fn isPointWithin(self: @This(), p: @Vector(2, T)) bool {
const q = @shuffle(T, p, self.xyxy, [4]i32{ -1, -2, 0, 1 });
const w = @shuffle(T, p, self.xyxy, [4]i32{ 0, 1, -3, -4 });
return @reduce(.And, q <= w);
}
pub fn isRectWithin(self: @This(), a: @This()) bool {
const q = @shuffle(T, a.xyxy, self.xyxy, [8]i32{ 0, 1, 2, 3, -1, -2, -1, -2 });
const w = @shuffle(T, a.xyxy, self.xyxy, [8]i32{ -3, -4, -3, -4, 0, 1, 2, 3 });
return @reduce(.And, q <= w);
}
// todo: Handle zero area cases?
pub fn isRectIntersecting(self: @This(), a: @This()) bool {
const q = @shuffle(T, a.xyxy, self.xyxy, [4]i32{ 0, 1, -1, -2 });
const w = @shuffle(T, a.xyxy, self.xyxy, [4]i32{ -3, -4, 2, 3 });
return @reduce(.And, q <= w);
}
};
}
```
### Assembly ###
This is produced by godbolt, which apparently has AVX512 extensions, so, it's extremely compact.
Note: Calling prelude and outro are omitted, with inlining you can expect it looking similarly.
Zig calling convention is used, which is roughly equal to C's static marked procedure.
For 32bit floating point:
```asm
"example.RectSIMD(f32).isPointWithin":
vmovaps xmm2, xmm0
vmovapd xmm1, xmmword ptr [rdi]
vunpcklpd xmm0, xmm1, xmm2
vblendpd xmm1, xmm1, xmm2, 1
vcmpleps k0, xmm0, xmm1
kmovd eax, k0
sub al, 15
sete al
"example.RectSIMD(f32).isRectWithin":
vmovaps xmm2, xmmword ptr [rsi]
vmovaps xmm0, xmm2
vmovddup xmm1, qword ptr [rdi]
vinsertf128 ymm0, ymm0, xmm1, 1
vmovddup xmm3, qword ptr [rdi + 8]
vmovaps xmm1, xmm3
vinsertf128 ymm1, ymm1, xmm2, 1
vcmpleps k0, ymm0, ymm1
kortestb k0, k0
setb al
"example.RectSIMD(f32).isRectIntersecting":
vmovapd xmm2, xmmword ptr [rsi]
vmovapd xmm1, xmmword ptr [rdi]
vunpcklpd xmm0, xmm2, xmm1
vunpckhpd xmm1, xmm1, xmm2
vcmpleps k0, xmm0, xmm1
kmovd eax, k0
sub al, 15
sete al
```
For 32bit signed integers it fares amazing too:
```asm
"example.RectSIMD(i32).isPointWithin":
vmovaps xmm1, xmm0
vmovdqa xmm2, xmmword ptr [rdi]
vpunpcklqdq xmm0, xmm2, xmm1
vpblendd xmm1, xmm1, xmm2, 12
vpcmpled k0, xmm0, xmm1
kmovd eax, k0
sub al, 15
sete al
"example.RectSIMD(i32).isRectWithin":
vmovdqa xmm2, xmmword ptr [rsi]
vmovaps xmm0, xmm2
vpbroadcastq xmm1, qword ptr [rdi]
vinserti128 ymm0, ymm0, xmm1, 1
vpbroadcastq xmm3, qword ptr [rdi + 8]
vmovaps xmm1, xmm3
vinserti128 ymm1, ymm1, xmm2, 1
vpcmpled k0, ymm0, ymm1
kortestb k0, k0
setb al
"example.RectSIMD(i32).isRectIntersecting":
vmovdqa xmm2, xmmword ptr [rsi]
vmovdqa xmm1, xmmword ptr [rdi]
vpunpcklqdq xmm0, xmm2, xmm1
vpunpckhqdq xmm1, xmm1, xmm2
vpcmpled k0, xmm0, xmm1
kmovd eax, k0
sub al, 15
sete al
```
64bit floating point:
```asm
"example.RectSIMD(f64).isPointWithin":
vmovaps xmm3, xmm0
vmovapd ymm1, ymmword ptr [rdi]
vinsertf128 ymm0, ymm1, xmm3, 1
vmovaps xmm2, xmm3
vblendpd ymm1, ymm1, ymm2, 3
vcmplepd k0, ymm0, ymm1
kmovd eax, k0
sub al, 15
sete al
"example.RectSIMD(f64).isRectWithin":
vmovapd ymm2, ymmword ptr [rsi]
vmovapd ymm1, ymmword ptr [rdi]
vmovaps ymm0, ymm2
vpermpd ymm3, ymm1, 68
vinsertf64x4 zmm0, zmm0, ymm3, 1
vpermpd ymm3, ymm1, 238
vmovaps ymm1, ymm3
vinsertf64x4 zmm1, zmm1, ymm2, 1
vcmplepd k0, zmm0, zmm1
kortestb k0, k0
setb al
"example.RectSIMD(f64).isRectIntersecting":
vmovapd ymm2, ymmword ptr [rsi]
vmovapd ymm1, ymmword ptr [rdi]
vperm2f128 ymm0, ymm2, ymm1, 32
vperm2f128 ymm1, ymm1, ymm2, 49
vcmplepd k0, ymm0, ymm1
kmovd eax, k0
sub al, 15
sete al
```
AVX512 makes it so that there's no big penalty for double precision types, which is nice.
### Edits ###
- Reordered to use packed vectors without swizzling when possible.
- Eliminated redundant computations.
- Calling convention notice.

View File

@@ -1,7 +1,7 @@
Title: Slim Summer Elf Title: Slim Summer Elf
Brief: Making of minimal x86 (Linux) ELF executable. Brief: Making of minimal x86 (Linux) ELF executable.
Date: 1684666702 Date: 1684666702
Tags: Programming, Linux, C Tags: Programming, Linux, C, Bash, Linker, Low-level
CSS: /style.css CSS: /style.css
Code below was composed for [4mb-jam](https://itch.io/jam/4mb-jam-2023) which I didn't finish. Code below was composed for [4mb-jam](https://itch.io/jam/4mb-jam-2023) which I didn't finish.
@@ -101,8 +101,6 @@ exit;
#ifdef ELF #ifdef ELF
/* https://github.com/Jorengarenar/CMObALL/blob/master/cmoball.h */
/* https://man7.org/linux/man-pages/man2/exit.2.html */ /* https://man7.org/linux/man-pages/man2/exit.2.html */
#define SYS_EXIT(p_return_code) \ #define SYS_EXIT(p_return_code) \
{ \ { \

View File

@@ -0,0 +1,36 @@
Title: Geonames Toponym Extractor Utility
Brief: Simple script for extracting ASCII toponym fields from geonames datasets
Date: 1713683410
Tags: Python, Script, Programming
CSS: /style.css
[Link to code](https://codeberg.org/veclavtalica/geonames-extractor)
Small script I used for extracting data for machine learning endeavors.
Usage:
```
dataset feature_class [feature_code] [--dirty] [--filter=mask]
```
From this invokation ...
```
./extractor.py datasets/UA.txt P PPL --filter=0123456789\"\'-\` > UA-prep.txt
```
... it produces a newline separated list of relevant toponyms of particular kind, such as:
```
Katerynivka
Vaniushkyne
Svistuny
Sopych
Shilova Balka
```
`--filter=` option is there so that aplhabet size could be reduced for learning purposes,
as there are usually quite a lot of symbols that are only found few times,
which produces poor balancing.
`--dirty` option reduces cases such as `Maydan (Ispas)` and `CHAYKA-Transmitter, Ring Mast 4` to `Maydan` and `CHAYKA-Transmitter`.
Duplicates are also removed.

BIN
articles/vector-pi-rotation/.static/noise.png (Stored with Git LFS) Normal file

Binary file not shown.

View File

@@ -0,0 +1,141 @@
Title: Optimized Vector Rotation
Brief: Specialized rotation methods over Pi and Pi/2 in 2D and 3D.
Date: 1699548646
Tags: Programming, Zig, Optimization
CSS: /style.css
Came up with some useful optimization for 90 and 180 degree rotations while making a grid walker,
below implementations are given, ripped straight from source, lol.
Compared to generic cos/sin method of rotation it's magnitudes of times less work in many cases, especially if glibc implementation is used.
Note: Given example assumes coordinate system where Y grows downwards and X to the right.
## Two dimensions
```zig
pub fn rotateByHalfPiClockwise(self: Self) Self {
return .{ .components = .{ -self.y(), self.x() } };
}
pub fn rotateByHalfPiCounterClockwise(self: Self) Self {
return .{ .components = .{ self.y(), -self.x() } };
}
pub fn rotateByPi(self: Self) Self {
return .{ .components = .{ -self.x(), -self.y() } };
}
```
## Three dimensions
```zig
pub fn rotateByHalfPiClockwiseAroundAxis(self: Self, axis: Axis3) Self {
return .{ .components = switch (axis) {
.x => .{ self.x(), -self.z(), self.y() },
.y => .{ -self.z(), self.y(), self.x() },
.z => .{ -self.y(), self.x(), self.z() },
} };
}
pub fn rotateByHalfPiCounterClockwiseAroundAxis(self: Self, axis: Axis3) Self {
return .{ .components = switch (axis) {
.x => .{ self.x(), self.z(), -self.y() },
.y => .{ self.z(), self.y(), -self.x() },
.z => .{ self.y(), -self.x(), self.z() },
} };
}
pub fn rotateByPiAroundAxis(self: Self, axis: Axis3) Self {
return .{ .components = switch (axis) {
.x => .{ self.x(), -self.x(), -self.y() },
.y => .{ -self.x(), self.y(), -self.z() },
.z => .{ -self.x(), -self.y(), self.z() },
} };
}
```
## Generated amd64 assembly
Note: Procedure prelude/epilogue is omitted. Zig's calling convention is used, which is roughly equivalent to C's static marked function in effect.
Note: It's for vectors stored packed for use in SSE, array/separate scalar passing produces worse result, at least when not inlined.
### rotateByHalfPiClockwise
Notice how it's one instruction longer than coutner-clockwise case,
so, choice of coordinate system effects costs of particular direction to rotate around.
```asm
vmovlpd qword ptr [rsp], xmm0
vmovshdup xmm1, xmm0
vpbroadcastd xmm2, dword ptr [rip + .LCPI2_0]
vpxor xmm1, xmm1, xmm2
vbroadcastss xmm0, xmm0
vblendps xmm0, xmm0, xmm1, 1
```
### rotateByHalfPiCounterClockwise
```asm
vmovlpd qword ptr [rsp], xmm0
vpbroadcastd xmm1, dword ptr [rip + .LCPI1_0]
vpxor xmm1, xmm0, xmm1
vmovshdup xmm0, xmm0
vinsertps xmm0, xmm0, xmm1, 16
```
### rotateByPi
```asm
vmovlpd qword ptr [rsp], xmm0
vpermilps xmm0, xmm0, 212
vpbroadcastd xmm1, dword ptr [rip + .LCPI3_0]
vpxor xmm0, xmm0, xmm1
```
### rotateByHalfPiClockwiseAroundAxis (X)
```asm
sub rsp, 24
vmovq qword ptr [rsp], xmm0
vpermilpd xmm1, xmm0, 1
vmovaps xmm2, xmm1
vmovss dword ptr [rsp + 8], xmm2
vpbroadcastd xmm2, dword ptr [rip + .LCPI4_0]
vpxor xmm1, xmm1, xmm2
vpermilps xmm0, xmm0, 212
vinsertps xmm0, xmm0, xmm1, 16
add rsp, 24
```
### rotateByHalfPiCounterClockwiseAroundAxis (X)
Again, one instruction shorter.
```asm
sub rsp, 24
vextractps dword ptr [rsp + 8], xmm0, 2
vmovq qword ptr [rsp], xmm0
vmovshdup xmm1, xmm0
vpbroadcastd xmm2, dword ptr [rip + .LCPI5_0]
vpxor xmm1, xmm1, xmm2
vpermilps xmm0, xmm0, 232
vinsertps xmm0, xmm0, xmm1, 32
add rsp, 24
```
### rotateByPiAroundAxis (X)
Now it's more work.
```asm
sub rsp, 24
vmovq qword ptr [rsp], xmm0
vpermilpd xmm1, xmm0, 1
vmovaps xmm2, xmm1
vmovss dword ptr [rsp + 8], xmm2
vmovshdup xmm2, xmm0
vbroadcastss xmm3, dword ptr [rip + .LCPI6_0]
vpxor xmm2, xmm2, xmm3
vpxor xmm1, xmm1, xmm3
vinsertps xmm0, xmm0, xmm2, 16
vinsertps xmm0, xmm0, xmm1, 32
add rsp, 24
```

View File

@@ -1,11 +0,0 @@
Title: .xm Tracks
Brief: .xm track listing with playback.
Date: 1684868476
Tags: Meta, Music, Js
CSS: /style.css
Today is the day of .xm archival and in-browser playback addition.
It's achieved with help of [jsxm](https://github.com/a1k0n/jsxm).
It lacks support for some things though, might need to contribute to it.
Could be neat to make some small program that would generate waveforms pngs next.

18
browse.sh Executable file
View File

@@ -0,0 +1,18 @@
#!/usr/bin/env bash
set +e
printf "%s" "Enter URL: "
read URL
articles=$(curl -s "$URL/articles.txt")
articles=$(echo -n "$articles" | python3 -c "import sys; from urllib.parse import unquote; print(unquote(sys.stdin.read()));")
while :
do
article=$(./tools/widgets/list_selector.py --desc="Select an article:" --result="line" -- $articles)
if [ -z "$article" ]; then
break
fi
curl -s "$URL/markdown/$article.md" | pager
done

View File

@@ -1,17 +1,46 @@
#!/bin/sh #!/usr/bin/env bash
set +e set +e
# Settings:
# =========
export CC=cc
mkdir -p ./html/articles mkdir -p ./html/articles
./tools/main_page_generator.py ./articles | ./tools/mmd/build/multimarkdown > ./html/index.html ./tools/main_page_generator.py ./articles | ./tools/mmd/build/multimarkdown > ./html/index.html
for d in ./articles/*; do for d in ./articles/*/; do
if [ -d "$d" ]; then if [ -d "$d" ]; then
if test -f "$d/make"; then
("$d/make")
fi
if test -d "$d/.dynamic"; then
mkdir -p "./html/articles/$(basename -- $d)"
cp -r "$d/.dynamic/." "./html/articles/$(basename -- $d)/"
fi
if test -d "$d/.static"; then
mkdir -p "./html/articles/$(basename -- $d)"
cp -r "$d/.static/." "./html/articles/$(basename -- $d)/"
fi
./tools/article_wrapper.py "$d/page.mmd" | ./tools/mmd/build/multimarkdown > "./html/articles/$(basename -- $d).html" ./tools/article_wrapper.py "$d/page.mmd" | ./tools/mmd/build/multimarkdown > "./html/articles/$(basename -- $d).html"
fi fi
done done
./tools/feed_generator.py ./articles/ https://mjestecko.neocities.org/ > ./html/feed.xml mkdir -p "./html/tags/"
./tools/tag_listing_generator.py ./articles/ ./html/ | ./tools/mmd/build/multimarkdown > "./html/tags.html"
./tools/track_listing_generator.py html/tracks > html/tracks.html for f in ./html/tags/*.html; do
echo $(cat "$f" | ./tools/mmd/build/multimarkdown) > "$f"
done
./tools/feed_generator.py ./articles/ > ./html/feed.xml
./tools/plaintext_article_listing_generator.py ./articles/ > ./html/articles.txt
mkdir -p "./html/markdown/"
for d in ./articles/*/; do
if [ -d "$d" ]; then
cp "$d/page.mmd" "./html/markdown/$(basename $d).md"
fi
done

45
config.py Normal file
View File

@@ -0,0 +1,45 @@
from random import choice
## Title of the blog
## Used for default first navbar entry to "/" root.
##
title = "mjestečko"
## Final hosting address, used in RSS feed absolute links as well as previews.
##
address = "https://mjestecko.neocities.org"
## Shows on top of every page providing navigation.
## Every entry forms a <li><a> child element of <nav>,
## where each dictionary pair forms an attached xml property.
##
navbar = [
("source", { "href": "https://git.poto.cafe/veclavtalica/mjestecko" }),
("rss", { "href": "/feed.xml" }),
("about", { "href": "/articles/mjestečko.html" }),
("tracks", { "href": "https://modarchive.org/index.php?request=view_artist_modules&query=96070" }),
("mastodon", { "href": "https://poto.cafe/@veclavtalica", "rel": "me" }),
]
## Optional description that will be shown on top of the main page.
## Could be plain text or callable with no parameters.
##
description = lambda: f"Personal blog of one {choice(adjectives)} Veclav Talica."
adjectives = ["*wild*", "**wacky**", "very humble", "**most serious**"]
## Optional link to logo image that will appear on top of the main page.
##
logo = "/logo.png"
## Language specifier, used in RSS feed.
##
language = "en"
## Port that is used to listed to remote git push signals.
##
webhook_port = 14032
## Something that only git hosting and your server should know.
## See: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Authorization
##
webhook_auth = "Basic you-secure-credentials"

View File

@@ -1,9 +0,0 @@
#!/usr/bin/bash
set +e
shopt -s extglob
for f in $1/!(*AutoSave*).xm
do
gzip -c $f | base64 --wrap=0 - | tr -d '\n' > $2/$(basename -- $f).txt
done

BIN
html/logo.png (Stored with Git LFS) Normal file

Binary file not shown.

View File

@@ -1,8 +1,37 @@
@media (prefers-color-scheme: dark) {
:root {
--bg-color: #111;
--fg-color: #fff;
--hover-color: #e0e0e0;
--link-color: dodgerblue;
--nav-border-color: #bbbbdd;
}
}
@media only screen and (max-device-width: 480px) {
article, .container {
width: 100%;
}
html {
font-size: 13px;
}
}
* {
color: var(--fg-color);
text-decoration: none;
}
a {
color: var(--link-color);
}
html { html {
font-size: 20px; font-size: 20px;
font-family: "Charter","Georgia",'Times New Roman',serif; font-family: "Charter", "Georgia", "Times New Roman", serif;
} }
body { body {
background-color: var(--bg-color);
line-height: 1.3; line-height: 1.3;
} }
article, .container { article, .container {
@@ -29,7 +58,7 @@ html {
margin: 0.9em 0px 0.9em; margin: 0.9em 0px 0.9em;
} }
pre, code { pre, code {
background-color: #eeeeff; background-color: var(--bg-color);
line-height: normal; line-height: normal;
overflow: auto; overflow: auto;
font-size: 0.8em; font-size: 0.8em;
@@ -38,7 +67,7 @@ html {
padding: 0.5em 0.5em; padding: 0.5em 0.5em;
border-radius: 4px; border-radius: 4px;
} }
.nav ul { nav ul {
padding-left: 0; padding-left: 0;
border-radius: 4px; border-radius: 4px;
list-style: none; list-style: none;
@@ -46,39 +75,41 @@ html {
flex-direction: row; flex-direction: row;
flex-wrap: nowrap; flex-wrap: nowrap;
font-size: 0.8em; font-size: 0.8em;
background-color: #f0f0f0; background-color: var(--bg-color);
border: 1px solid #bbbbdd; border: 1px solid var(--nav-border-color);
overflow: scroll;
} }
.nav li:first-child { nav li:first-child {
border-left: none; border-left: none;
} }
.nav li { nav li {
border-right: 1px solid #bbbbdd; border-right: 1px solid var(--nav-border-color);
border-left: 1px solid #ffffff; border-left: 1px solid var(--fg-color);
} }
.nav a { nav a {
padding: 0.4em 1em; padding: 0.4em 1em;
display: inline-block; display: inline-block;
text-decoration: none; text-decoration: none;
font-family: arial, sans; font-family: arial, sans;
} }
.nav a:hover { nav a:hover {
background-color: #e0e0e0; background-color: var(--hover-color);
} }
.nav a:link { nav a:link {
color: #44d; color: var(--link-color);
} }
.nav a:visited { nav a:visited {
color: #44d; color: var(--link-color);
} }
.nav a.here { nav a.here {
background-color: #e0e0e0; background-color: var(--hover-color);
}
@media only screen and (max-device-width: 480px) {
article, .container {
width: 100%;
}
html {
font-size: 13px;
} }
img {
max-width: 100%;
height: auto;
width: auto;
margin-left: auto;
margin-right: auto;
display: block;
} }

1351
html/xm.js

File diff suppressed because it is too large Load Diff

3
local_host.sh Executable file
View File

@@ -0,0 +1,3 @@
#!/usr/bin/env bash
python3 -m http.server --directory ./html/ & xdg-open http://0.0.0.0:8000/

40
new-article.sh Executable file
View File

@@ -0,0 +1,40 @@
#!/usr/bin/env bash
set +e
printf "%s" "Enter title: "
read title
printf "%s" "Enter directory name: "
read directory
if [ -d "./articles/$directory/" ]; then
echo "Directory already exists, aborted."
exit
fi
printf "%s" "Enter brief: "
read brief
printf "%s" "Enter tags: "
read tags
mkdir "./articles/$directory/"
mask=$(cat <<-format
Title: %s
Brief: %s
Date: %s
Tags: %s
CSS: /style.css
format
)
date=$(date +%s)
printf "$mask" "$title" "$brief" "$date" "$tags" \
> "./articles/$directory/page.mmd"
if which xdg-open &> /dev/null; then
xdg-open "./articles/$directory/page.mmd"
fi

View File

@@ -1,9 +1,9 @@
#!/bin/sh #!/usr/bin/env bash
git submodule init set +e
git submodule update
cd tools/mmd git submodule update --init --recursive
make release git-lfs fetch
cd build
make (cd tools/mmd && make release)
cd ../.. (cd tools/mmd/build && make)

3
remote_host.sh Executable file
View File

@@ -0,0 +1,3 @@
#!/usr/bin/env bash
python3 -m http.server --directory ./html/ & python3 ./tools/git_webhook.py

View File

@@ -1,8 +1,10 @@
import time, subprocess import time, subprocess
from os import walk, path
import urllib.parse
def the_line_after_metadata(lines: []) -> int: def the_line_after_metadata(lines: []) -> int:
i = 0 i = 0
while lines[i].strip(): while i < len(lines) and lines[i].strip():
i += 1 i += 1
return i return i
@@ -18,9 +20,24 @@ def parse_metadata(filepath: str) -> {}:
if key == "Date": if key == "Date":
result["Date"] = time.gmtime(int(val)) result["Date"] = time.gmtime(int(val))
elif key == "Tags": elif key == "Tags":
result["Tags"] = val.split(",") result["Tags"] = [v.strip() for v in val.split(",")]
else: elif val:
result[key] = val result[key] = val
result["Last Edit"] = time.gmtime(int(subprocess.getoutput(r"stat -c %Y " + filepath))) result["Last Edit"] = time.gmtime(int(subprocess.getoutput(r"stat -c %Y " + filepath)))
return result return result
def parse_article_directory(directory: str) -> {}:
articles = {}
for root, dirs, _ in walk(directory):
for d in dirs:
metadata = parse_metadata(path.abspath(root + '/' + d + "/page.mmd"))
article = urllib.parse.quote(d)
articles[article] = {
"metadata": metadata
}
break
return articles
def sort_titles_by_date(articles: {}) -> []:
return sorted(articles.keys(), key=lambda a: articles[a]["metadata"].get("Date", time.gmtime(0)), reverse=True)

View File

@@ -1,12 +1,16 @@
#!/usr/bin/python3 #!/usr/bin/env python3
# todo: Show related git history of a file? # todo: Show related git history of a file?
from sys import argv, exit from sys import argv, exit
import time import time, urllib.parse, re
import os.path as path
from article_utils import the_line_after_metadata, parse_metadata from article_utils import the_line_after_metadata, parse_metadata
from page_shares import wrap_page, MONTHS from page_builder import wrap_page
from date_descriptions import MONTHS
import config
if len(argv) <= 1: if len(argv) <= 1:
print("No file was supplied") print("No file was supplied")
@@ -17,9 +21,10 @@ with open(argv[1], "r") as f:
i = the_line_after_metadata(content) i = the_line_after_metadata(content)
metadata = parse_metadata(argv[1]) metadata = parse_metadata(argv[1])
directory = path.split(path.dirname(path.abspath(argv[1])))[-1]
title = metadata.get("Title", "Oopsie, somebody forgot to name the article!") title = metadata["Title"]
article_head = "\n# " + title + "\n---\n" article_head = "\n# " + title + "\n"
brief = metadata.get("Brief") brief = metadata.get("Brief")
if not brief is None: if not brief is None:
@@ -35,11 +40,25 @@ if not last_edit is None:
last_edit.tm_mday != date.tm_mday or last_edit.tm_year != date.tm_year: last_edit.tm_mday != date.tm_mday or last_edit.tm_year != date.tm_year:
article_head += f"-- Edited: *{MONTHS[last_edit.tm_mon]} {last_edit.tm_mday}, {last_edit.tm_year} UTC*\n\n" article_head += f"-- Edited: *{MONTHS[last_edit.tm_mon]} {last_edit.tm_mday}, {last_edit.tm_year} UTC*\n\n"
# todo: Hyperlinks to appropriate tag pages.
tags = metadata.get("Tags") tags = metadata.get("Tags")
if tags: if tags:
article_head += f"""-- Tags: *{",".join(tags)}*\n\n""" tag_links = []
for tag in tags:
tag_links.append(f"[{tag}](/tags/{urllib.parse.quote(tag.lower())}.html)")
article_head += f"""-- Tags: *{", ".join(tag_links)}*\n\n"""
article_head += "---\n\n" article_head += "---\n\n"
print(''.join(content[:i]) + wrap_page(article_head + ''.join(content[i:]))) header = f"""HTML header: <meta property="og:title" content="{title} on mjestečko"></meta>
<meta property="og:type" content="article"></meta>
<meta property="og:url" content="{config.address}/articles/{urllib.parse.quote(directory)}.html"></meta>
<meta name="viewport" content="width=device-width, initial-scale=1">
"""
if not brief is None:
header += f"""<meta property="og:description" content="{brief}"></meta>\n"""
front_image = re.compile(r"!\[.*\]\((.+?)\)", re.DOTALL).search(''.join(content[i:]))
if not front_image is None:
header += f"""<meta property="og:image" content="{config.address}/{urllib.parse.quote(front_image.group(1))}"></meta>\n"""
print(header + ''.join(content[:i]) + wrap_page(article_head + ''.join(content[i:])))

1
tools/config.py Symbolic link
View File

@@ -0,0 +1 @@
../config.py

View File

@@ -0,0 +1,24 @@
MONTHS = {
1: "January",
2: "February",
3: "March",
4: "April",
5: "May",
6: "June",
7: "July",
8: "August",
9: "September",
10: "October",
11: "November",
12: "December"
}
WEEKDAYS = {
0: "Monday",
1: "Tuesday",
2: "Wednesday",
3: "Thursday",
4: "Friday",
5: "Saturday",
6: "Sunday"
}

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3 #!/usr/bin/env python3
from sys import argv, exit from sys import argv, exit
from os import walk, path from os import walk, path
@@ -7,28 +7,23 @@ from textwrap import indent
import time, urllib.parse, re, subprocess import time, urllib.parse, re, subprocess
from article_utils import parse_metadata from article_utils import parse_metadata
from page_shares import ADJECTIVES
from rfc822 import stringify_date from rfc822 import stringify_date
import config
if len(argv) <= 1: if len(argv) <= 1:
print("No directory was supplied") print("No directory was supplied")
exit(-1) exit(-1)
if len(argv) <= 2:
print("No address was supplied")
exit(-1)
seed() seed()
address = argv[2]
# todo: Find the latest pubDate # todo: Find the latest pubDate
feed = f"""<rss version="2.0"> feed = f"""<rss version="2.0">
<channel> <channel>
<title>mjestečko</title> <title>{config.title}</title>
<link>{address}</link> <link>{config.address}</link>
<description>Personal blog of one {choice(ADJECTIVES)} Veclav Talica</description> <description>{config.description() if callable(config.description) else config.description}</description>
<language>en</language> <language>{config.language}</language>
<lastBuildDate>{stringify_date(time.gmtime(int(time.time())))}</lastBuildDate> <lastBuildDate>{stringify_date(time.gmtime(int(time.time())))}</lastBuildDate>
""" """
@@ -54,9 +49,10 @@ for root, dirs, _ in walk(argv[1]):
f""" <pubDate>{stringify_date(metadata["Date"])}</pubDate>\n""" f""" <pubDate>{stringify_date(metadata["Date"])}</pubDate>\n"""
feed += ( feed += (
f""" <guid>/articles/{d}</guid>\n""" f""" <guid>/articles/{d}</guid>\n"""
f""" <link>{address}/articles/{urllib.parse.quote(d)}</link>\n""" f""" <link>{config.address}/articles/{urllib.parse.quote(d)}</link>\n"""
" </item>\n" " </item>\n"
) )
break
feed += """ </channel> feed += """ </channel>
</rss>""" </rss>"""

1
tools/gifenc Submodule

Submodule tools/gifenc added at 87acd487df

56
tools/git_webhook.py Normal file
View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python3
from http.client import parse_headers
from http.server import BaseHTTPRequestHandler
from http.server import HTTPServer
import subprocess
import config
## Simple way to automatically pull and recompile on a remote server.
## Run this from the root directory.
##
## Currently supports:
## - Gitea (Via GET method).
##
class HttpHandler(BaseHTTPRequestHandler):
def do_GET(self):
got_gitea_push_event = False
got_auth = False
for header in self.headers:
match [header, self.headers[header]]:
case ["X-Gitea-Event", "push"]:
got_gitea_push_event = True
case ["Authorization", config.webhook_auth]:
got_auth = True
if not got_gitea_push_event or not got_auth:
self.send_response(400)
return
# todo: This way of doing it blocks both parties. Not ideal.
self.send_response(200)
subprocess.run(["git", "pull"])
subprocess.run(["./compile.sh"])
print("Pulled and recompiled.")
def run(server_class=HTTPServer, handler_class=HttpHandler):
server_address = ('', config.webhook_port)
httpd = server_class(server_address, handler_class)
try:
httpd.serve_forever()
except KeyboardInterrupt:
httpd.server_close()
if __name__ == "__main__":
run()

View File

@@ -1,12 +1,14 @@
#!/usr/bin/python3 #!/usr/bin/env python3
from sys import argv, exit from sys import argv, exit
from os import walk, path from random import seed
from random import choice, seed import time
import time, urllib.parse
from article_utils import parse_metadata from article_utils import parse_article_directory, sort_titles_by_date
from page_shares import wrap_page, ADJECTIVES, MONTHS from page_builder import wrap_page
from date_descriptions import MONTHS
import config
if len(argv) <= 1: if len(argv) <= 1:
print("No directory was supplied") print("No directory was supplied")
@@ -14,30 +16,29 @@ if len(argv) <= 1:
seed() seed()
page_metadata = """Title: mjestečko page_metadata = f"""Title: {config.title}
CSS: /style.css CSS: /style.css
HTML header: <meta name="viewport" content="width=device-width, initial-scale=1">
""" """
page = f"""Personal blog of one {choice(ADJECTIVES)} Veclav Talica. page = f"""![]({config.logo})
--- {config.description() if callable(config.description) else config.description}
### Articles ### ### Articles ###
""" """
# todo: Sort by date first. artciles = parse_article_directory(argv[1])
for root, dirs, _ in walk(argv[1]):
for d in dirs: for title in sort_titles_by_date(artciles):
metadata = parse_metadata(path.abspath(root + '/' + d + "/page.mmd")) article = artciles[title]
article = urllib.parse.quote(d) metadata = article["metadata"]
page += ( page += (
f"""[{metadata.get("Title", "No title given! What a clusterfuck!")}](/articles/{article}.html)\n\n""" f"""[{metadata.get("Title", "No title given! What a clusterfuck!")}](/articles/{title}.html)\n\n"""
f""">{metadata.get("Brief", "")}\n\n""" f""">{metadata.get("Brief", "")}\n\n"""
) )
if "Tags" in metadata:
page += f""">*{','.join(metadata["Tags"])}*\n---\n"""
curtime = time.gmtime(int(time.time())) curtime = time.gmtime(int(time.time()))
page += f"Last compiled: *{MONTHS[curtime.tm_mon]} {curtime.tm_mday}, {curtime.tm_year} {curtime.tm_hour}:{curtime.tm_min:02d} UTC*\n\n" page += f"Last compiled: *{MONTHS[curtime.tm_mon]} {curtime.tm_mday}, {curtime.tm_year} {curtime.tm_hour}:{curtime.tm_min:02d} UTC*\n\n"

43
tools/page_builder.py Normal file
View File

@@ -0,0 +1,43 @@
import config
_navbar = config.navbar.copy()
_navbar.insert(0, ('<strong>' + config.title + '</strong>', {"href": "/"}))
_navbar.append(("tags", {"href": "/tags.html"}))
_navbar_lis = '\n'.join(f"""<li><a {' '.join(
f'{p}="{v}"' for p, v in e[1].items())}>{e[0]}</a></li>"""
for e in _navbar)
_head = f"""
<div class="container">
<nav>
<ul>
{_navbar_lis}
</ul>
</nav>
"""
_footer = """
<footer>
<a href="#top">^ Return</a>
</footer>
"""
_tail = """
</div>
"""
def mixin_tag(content: str, tag: str) -> str:
return f"""<{tag}>
{content}</{tag}>
"""
def wrap_page(page: str) -> str:
return _head + mixin_tag(page, "main") + _footer + _tail

View File

@@ -1,59 +0,0 @@
HEAD_EMBED = """
<div style="display: flex;">
<div>
## mjestečko ##
<ul class="nav">
<li><a href="/">main page</a></li>
<li><a href="https://git.poto.cafe/veclavtalica/mjestecko">source</a></li>
<li><a href="/tracks.html">tracks</a></li>
<li><a href="/feed.xml">rss</a></li>
<li><a href="/articles/mjestečko.html">about</a></li>
</ul>
</div>
<div class="container">
"""
TAIL_EMBED = """
---
*Remember, - all you see here is free for use for any purpose whatsoever.*
</div>
"""
ADJECTIVES = ["*wild*", "**wacky**", "very humble", "**most serious**"]
MONTHS = {
1: "January",
2: "February",
3: "March",
4: "April",
5: "May",
6: "June",
7: "July",
8: "August",
9: "September",
10: "October",
11: "November",
12: "December"
}
WEEKDAYS = {
0: "Monday",
1: "Tuesday",
2: "Wednesday",
3: "Thursday",
4: "Friday",
5: "Saturday",
6: "Sunday"
}
def wrap_page(page: str) -> str:
return HEAD_EMBED + page + TAIL_EMBED

View File

@@ -0,0 +1,15 @@
#!/usr/bin/env python3
from sys import argv, exit
from article_utils import parse_article_directory, sort_titles_by_date
if len(argv) <= 1:
print("No directory was supplied")
exit(-1)
articles = parse_article_directory(argv[1])
result = '\n'.join(sort_titles_by_date(articles))
print(result)

View File

@@ -1,4 +1,4 @@
from page_shares import MONTHS, WEEKDAYS from date_descriptions import MONTHS, WEEKDAYS
def stringify_date(date) -> str: def stringify_date(date) -> str:
return f"{WEEKDAYS[date.tm_wday][:3]}, {date.tm_mday} {MONTHS[date.tm_mon][:3]} {date.tm_year} {date.tm_hour:02d}:{date.tm_min:02d}:{date.tm_sec:02d} GMT" return f"{WEEKDAYS[date.tm_wday][:3]}, {date.tm_mday} {MONTHS[date.tm_mon][:3]} {date.tm_year} {date.tm_hour:02d}:{date.tm_min:02d}:{date.tm_sec:02d} GMT"

61
tools/tag_listing_generator.py Executable file
View File

@@ -0,0 +1,61 @@
#!/usr/bin/env python3
# todo: Problems might arise with casing if there are overlaps such as 'Tag' and 'tag'.
from sys import argv, exit
import time, urllib.parse, re
from os import walk
import os.path as path
from article_utils import the_line_after_metadata, parse_metadata
from page_builder import wrap_page
# todo: Reuse
tag_listing_header = """CSS: /style.css
HTML header: <meta name="viewport" content="width=device-width, initial-scale=1">
"""
main_listing_header = """CSS: /style.css
HTML header: <meta name="viewport" content="width=device-width, initial-scale=1">
"""
if len(argv) <= 1:
print("No article directory was supplied")
exit(-1)
if len(argv) <= 2:
print("No tag listing output directory was supplied")
exit(-1)
tag_to_tag_page = {}
tag_to_articles = {}
tag_counts = {}
article_to_title = {}
for root, dirs, _ in walk(argv[1]):
for d in dirs:
metadata = parse_metadata(path.abspath(root + '/' + d + "/page.mmd"))
article = "/articles/" + urllib.parse.quote(d) + ".html"
for tag in metadata.get('Tags', []):
tag_to_articles[tag] = tag_to_articles.get(tag, []) + [article]
tag_counts[tag] = tag_counts.get(tag, 0) + 1
article_to_title[article] = metadata['Title']
break
for tag in tag_to_articles:
tag_page = f"/tags/{urllib.parse.quote(tag.lower())}.html"
tag_to_tag_page[tag] = tag_page
with open(argv[2] + tag_page, 'w') as f:
tagged_article_listing = f"\n# Tagged {tag} #\n---\n" + \
'\n'.join(f"- [{article_to_title[article]}]({article})" \
for article in tag_to_articles[tag])
f.write(tag_listing_header + wrap_page(tagged_article_listing))
main_listing = "\n# Tag Listing #\n---\n" + \
', '.join(f"[{tag}]({tag_to_tag_page[tag]}) ({tag_counts[tag]})" \
for tag in sorted(tag_counts.keys(), key=lambda x: tag_counts[x], reverse=True)) + \
'\n\n'
print(main_listing_header + wrap_page(main_listing))

View File

@@ -1,98 +0,0 @@
#!/usr/bin/python3
from sys import argv, exit
from os import walk, path
if len(argv) <= 1:
print("No directory was supplied")
exit(-1)
page = """<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta charset="utf-8"/>
<title>Tracks</title>
<link type="text/css" rel="stylesheet" href="/style.css"/>
<script type="text/javascript" src="/xm.js"></script>
<script type="text/javascript">
(function (window, document) {
if (!window.XMPlayer)
window.XMPlayer = {};
var XMPlayer = window.XMPlayer;
var was_init = false
// https://stackoverflow.com/questions/16245767/creating-a-blob-from-a-base64-string-in-javascript
function b64toBlob(base64) {
const decoded = atob(base64);
const uInt8Array = new Uint8Array(decoded.length);
for (let i = 0; i < decoded.length; ++i)
uInt8Array[i] = decoded.charCodeAt(i);
return new Blob([uInt8Array]);
}
async function decompressGzippedBase64(str) {
const ds = new DecompressionStream(`gzip`);
const decompressedStream = b64toBlob(str).stream().pipeThrough(ds);
return new Response(decompressedStream).blob();
}
window.loadAndPlayTrack = function(url) {
if (!was_init) {
XMPlayer.init();
was_init = true;
}
var request = new XMLHttpRequest();
request.responseType = `text`;
request.open('GET', url);
request.send();
request.onload = async(_) => {
if (request.readyState === 4 && request.status === 200) {
XMPlayer.stop();
XMPlayer.load(await (await decompressGzippedBase64(request.response)).arrayBuffer());
XMPlayer.play();
}
}
}
})(window, document)
</script>
</head>
<body>
<div style="display: flex;">
<div>
<h2 id="mjestečko">mjestečko</h2>
<ul class="nav">
<li><a href="/">main page</a></li>
<li><a href="https://git.poto.cafe/veclavtalica/mjestecko">source</a></li>
<li><a href="/tracks.html">tracks</a></li>
<li><a href="/feed.xml">rss</a></li>
<li><a href="/articles/mjestečko.html">about</a></li>
</ul>
</div>
<div class="container">
<h3 id="articles">Tracks</h3>
<p>.xm module tracks of my own. Btw, they're playable in browser :3</p>
<p>Note that some files are rendered incorrectly.</p>
<hr/>
"""
for _, _, files in walk(argv[1]):
files.sort()
for f in files:
# note: Base64 gzip encoded data is expected.
if not f.endswith('.xm.txt'):
continue
page += (
f""" <div><p style="display: inline;">{f[:-4]}</p><button style="float: right;" onclick="window.loadAndPlayTrack('/tracks/{f}')">play</button></div>\n"""
" <hr/>\n"
)
page += """ </div>
</div>
</body>
</html>
"""
print(page)

95
tools/widgets/list_selector.py Executable file
View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python3
from sys import argv
import curses
from wrapper import widget_wrapper
list_starting_arg = 1
for i, arg in enumerate(argv[1:]):
if arg == '--':
if i + 2 == len(argv):
print("Empty list given")
exit(-1)
list_starting_arg = i + 2
break
else:
print("List starting -- wasn't given")
exit(-1)
desc_arg = ''
result_arg = 'index'
# todo: Generalize and simplify over descriptor object.
for arg in argv[1:]:
if arg.startswith('--'):
if arg == '--':
break
elif arg.startswith('--result='):
result_arg = arg[arg.find('=') + 1:]
if result_arg not in ['index', 'line']:
print("Invalid --result=")
exit(-1)
elif arg.startswith('--desc='):
desc_arg = arg[arg.find('=') + 1:]
else:
print("Unknown parameter ", arg)
exit(-1)
else:
print("Unknown parameter ", arg)
exit(-1)
current = 0
lines = argv[list_starting_arg:]
list_box = None
def init(screen):
global list_box
curses.start_color()
curses.curs_set(0)
y = 0
if desc_arg != '':
list_box = screen.subwin(y + 1, 0)
y += 1
def draw_list_box():
y = 0
list_box.border()
y += 1
for i, line in enumerate(lines):
list_box.addstr(y, 1, line, curses.A_REVERSE if i == current else curses.A_NORMAL)
y += 1
list_box.refresh()
def driver(screen):
global current
y = 0
if desc_arg != '':
screen.addstr(y, 0, desc_arg)
y += 1
draw_list_box()
key = screen.getch()
if key == curses.KEY_DOWN:
current = (current + 1) % len(lines)
elif key == curses.KEY_UP:
current = len(lines) - 1 if current == 0 else current - 1
elif key == curses.KEY_ENTER or key == 10 or key == 13:
if result_arg == 'index':
return str(current)
elif result_arg == 'line':
return lines[current]
screen.refresh()
if __name__ == "__main__":
print(widget_wrapper(init, driver))

47
tools/widgets/wrapper.py Normal file
View File

@@ -0,0 +1,47 @@
import curses
import signal
import os, sys
from sys import argv, exit
def handler(signum, frame):
curses.endwin()
exit(1)
init = None
driver = None
def curses_wrapper(screen):
curses.noecho()
curses.cbreak()
screen.keypad(True)
init(screen)
while True:
result = driver(screen)
if result != None:
return result
def widget_wrapper(p_init, p_driver):
signal.signal(signal.SIGINT, handler)
global init, driver
init = p_init
driver = p_driver
with open('/dev/tty', 'rb') as inf, open('/dev/tty', 'wb') as outf:
saved_stdin = os.dup(0)
saved_stdout = os.dup(1)
saved_stderr = os.dup(2)
os.dup2(inf.fileno(), 0)
os.dup2(outf.fileno(), 1)
os.dup2(outf.fileno(), 2)
result = curses.wrapper(curses_wrapper)
os.dup2(saved_stdin, 0)
os.dup2(saved_stdout, 1)
os.dup2(saved_stderr, 2)
return result

30
upload.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env bash
set +e
for arg in "$*"
do
case "$arg" in
"--fresh") find ./html/ -type f -name '*.upload-checksum' -delete
;;
esac
done
for cur in ./html/{*,*/*,*/*/*}; do
if [ -f "$cur" ] && [[ ! "$cur" == *.upload-checksum ]]; then
if [ -f "$cur.upload-checksum" ]; then
c=$(cat "$cur.upload-checksum" | sha256sum -c 2> /dev/null)
if [[ "$c" == *OK ]]; then
echo "$cur is up-to-date, skipping"
continue
fi
fi
echo $(sha256sum "$cur") > "$cur.upload-checksum"
d=$(dirname $(realpath --relative-to="./html" "$cur"))
if [[ "$d" == "." ]]; then
neocities upload $cur
else
neocities upload -d $(dirname $(realpath --relative-to="./html" "$cur")) $cur
fi
fi
done