vector-pi-rotation article

2023-11-09 21:59:42 +05:00 · 2023-11-09 21:59:42 +05:00 · efa3391dbd
commit efa3391dbd
parent f2020f7463
1 changed files with 141 additions and 0 deletions
--- a/articles/vector-pi-rotation/page.mmd
+++ b/articles/vector-pi-rotation/page.mmd
@ -0,0 +1,141 @@
+Title:  Optimized Vector Rotation
+Brief:  Specialized rotation methods over Pi and Pi/2 in 2D and 3D.
+Date:   1699548646
+Tags:   Programming, Zig, Optimization
+CSS:    /style.css
+
+Came up with some useful optimization for 90 and 180 degree rotations while making a grid walker,
+below implementations are given, ripped straight from source, lol.
+
+Compared to generic cos/sin method of rotation it's magnitudes of times less work in many cases, especially if glibc implementation is used.
+
+Note: Given example assumes coordinate system where Y grows downwards and X to the right.
+
+## Two dimensions
+
+```zig
+pub fn rotateByHalfPiClockwise(self: Self) Self {
+    return .{ .components = .{ -self.y(), self.x() } };
+}
+
+pub fn rotateByHalfPiCounterClockwise(self: Self) Self {
+    return .{ .components = .{ self.y(), -self.x() } };
+}
+
+pub fn rotateByPi(self: Self) Self {
+    return .{ .components = .{ -self.x(), -self.y() } };
+}
+
+```
+
+## Three dimensions
+
+```zig
+pub fn rotateByHalfPiClockwiseAroundAxis(self: Self, axis: Axis3) Self {
+    return .{ .components = switch (axis) {
+        .x => .{ self.x(), -self.z(), self.y() },
+        .y => .{ -self.z(), self.y(), self.x() },
+        .z => .{ -self.y(), self.x(), self.z() },
+    } };
+}
+
+pub fn rotateByHalfPiCounterClockwiseAroundAxis(self: Self, axis: Axis3) Self {
+    return .{ .components = switch (axis) {
+        .x => .{ self.x(), self.z(), -self.y() },
+        .y => .{ self.z(), self.y(), -self.x() },
+        .z => .{ self.y(), -self.x(), self.z() },
+    } };
+}
+
+pub fn rotateByPiAroundAxis(self: Self, axis: Axis3) Self {
+    return .{ .components = switch (axis) {
+        .x => .{ self.x(), -self.x(), -self.y() },
+        .y => .{ -self.x(), self.y(), -self.z() },
+        .z => .{ -self.x(), -self.y(), self.z() },
+    } };
+}
+
+```
+
+## Generated amd64 assembly
+Note: Procedure prelude/epilogue is omitted. Zig's calling convention is used, which is roughly equivalent to C's static marked function in effect.
+
+Note: It's for vectors stored packed for use in SSE, array/separate scalar passing produces worse result, at least when not inlined.
+
+### rotateByHalfPiClockwise
+Notice how it's one instruction longer than coutner-clockwise case,
+so, choice of coordinate system effects costs of particular direction to rotate around.
+
+```asm
+        vmovlpd qword ptr [rsp], xmm0
+        vmovshdup       xmm1, xmm0
+        vpbroadcastd    xmm2, dword ptr [rip + .LCPI2_0]
+        vpxor   xmm1, xmm1, xmm2
+        vbroadcastss    xmm0, xmm0
+        vblendps        xmm0, xmm0, xmm1, 1
+```
+### rotateByHalfPiCounterClockwise
+
+```asm
+        vmovlpd qword ptr [rsp], xmm0
+        vpbroadcastd    xmm1, dword ptr [rip + .LCPI1_0]
+        vpxor   xmm1, xmm0, xmm1
+        vmovshdup       xmm0, xmm0
+        vinsertps       xmm0, xmm0, xmm1, 16
+```
+
+### rotateByPi
+```asm
+        vmovlpd qword ptr [rsp], xmm0
+        vpermilps       xmm0, xmm0, 212
+        vpbroadcastd    xmm1, dword ptr [rip + .LCPI3_0]
+        vpxor   xmm0, xmm0, xmm1
+```
+
+### rotateByHalfPiClockwiseAroundAxis (X)
+```asm
+        sub     rsp, 24
+        vmovq   qword ptr [rsp], xmm0
+        vpermilpd       xmm1, xmm0, 1
+        vmovaps xmm2, xmm1
+        vmovss  dword ptr [rsp + 8], xmm2
+        vpbroadcastd    xmm2, dword ptr [rip + .LCPI4_0]
+        vpxor   xmm1, xmm1, xmm2
+        vpermilps       xmm0, xmm0, 212
+        vinsertps       xmm0, xmm0, xmm1, 16
+        add     rsp, 24
+```
+
+### rotateByHalfPiCounterClockwiseAroundAxis (X)
+Again, one instruction shorter.
+
+```asm
+        sub     rsp, 24
+        vextractps      dword ptr [rsp + 8], xmm0, 2
+        vmovq   qword ptr [rsp], xmm0
+        vmovshdup       xmm1, xmm0
+        vpbroadcastd    xmm2, dword ptr [rip + .LCPI5_0]
+        vpxor   xmm1, xmm1, xmm2
+        vpermilps       xmm0, xmm0, 232
+        vinsertps       xmm0, xmm0, xmm1, 32
+        add     rsp, 24
+```
+
+### rotateByPiAroundAxis (X)
+Now it's more work.
+
+```asm
+        sub     rsp, 24
+        vmovq   qword ptr [rsp], xmm0
+        vpermilpd       xmm1, xmm0, 1
+        vmovaps xmm2, xmm1
+        vmovss  dword ptr [rsp + 8], xmm2
+        vmovshdup       xmm2, xmm0
+        vbroadcastss    xmm3, dword ptr [rip + .LCPI6_0]
+        vpxor   xmm2, xmm2, xmm3
+        vpxor   xmm1, xmm1, xmm3
+        vinsertps       xmm0, xmm0, xmm2, 16
+        vinsertps       xmm0, xmm0, xmm1, 32
+        add     rsp, 24
+```
+