sysv zov generation for low overhead ffi
This commit is contained in:
parent
0d5bd46412
commit
b66c3dca14
2
gdb.sh
2
gdb.sh
@ -1,3 +1,3 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
gdb ./zig-out/bin/nmvm -ex 'b arch.x86-64.execute' -ex 'layout asm' -ex 'r'
|
gdb ./zig-out/bin/nmvm -ex 'b arch.x86-64.jedino-jedro.execute' -ex 'layout asm' -ex 'r'
|
||||||
|
@ -1 +1,2 @@
|
|||||||
pub usingnamespace @import("x86-64/jedino-jedro.zig");
|
pub usingnamespace @import("x86-64/jedino-jedro.zig");
|
||||||
|
pub usingnamespace @import("x86-64/ve-sistema.zig");
|
||||||
|
@ -7,29 +7,29 @@
|
|||||||
//! Stack/thread pointers are chosen so that SysV and MS abis callee side preserve those,
|
//! Stack/thread pointers are chosen so that SysV and MS abis callee side preserve those,
|
||||||
//! so that we don't need to constantly push and restore on procedure call.
|
//! so that we don't need to constantly push and restore on procedure call.
|
||||||
|
|
||||||
// todo: Use r12, r13, ... instead? They're preserved in Sys V abi which might make it more confortable,
|
|
||||||
// but they might increase binary size, gotta test.
|
|
||||||
|
|
||||||
// todo: Try using something else that has lesser opcode size.
|
|
||||||
// Execution thread convention:
|
// Execution thread convention:
|
||||||
// r12 <- binary thread pointer
|
// r12 <- binary thread pointer
|
||||||
// r13 <- return stack pointer
|
// r13 <- return stack pointer
|
||||||
|
// r14 <- extension context pointer
|
||||||
|
|
||||||
// Resources used:
|
// todo: Use ZF flag as conditional register so to not involve stack?
|
||||||
|
// Alternatively we could keep boolean word, but implement it in vector semantics.
|
||||||
|
|
||||||
|
// Resources:
|
||||||
// https://mort.coffee/home/fast-interpreters/
|
// https://mort.coffee/home/fast-interpreters/
|
||||||
// https://blog.reverberate.org/2021/04/21/musttail-efficient-interpreters.html
|
// https://blog.reverberate.org/2021/04/21/musttail-efficient-interpreters.html
|
||||||
// https://en.wikibooks.org/wiki/X86_Assembly/GNU_assembly_syntax
|
// https://en.wikibooks.org/wiki/X86_Assembly/GNU_assembly_syntax
|
||||||
// https://www.cs.princeton.edu/courses/archive/spr18/cos217/lectures/15_AssemblyFunctions.pdf
|
// https://www.cs.princeton.edu/courses/archive/spr18/cos217/lectures/15_AssemblyFunctions.pdf
|
||||||
// https://ziglang.org/documentation/master/#toc-Assembly
|
// https://ziglang.org/documentation/master/#toc-Assembly
|
||||||
// https://csiflabs.cs.ucdavis.edu/~ssdavis/50/att-syntax.htm
|
// https://csiflabs.cs.ucdavis.edu/~ssdavis/50/att-syntax.htm
|
||||||
|
// https://stackoverflow.com/questions/37639993/is-this-assembly-function-call-safe-complete
|
||||||
|
|
||||||
// Neat things:
|
// Neat things:
|
||||||
// https://joryanick.com/retro-fast-x86-memcpy.php
|
// https://joryanick.com/retro-fast-x86-memcpy.php
|
||||||
// https://www.codeproject.com/Articles/1110153/Apex-memmove-the-fastest-memcpy-memmove-on-x-x-EVE
|
// https://www.codeproject.com/Articles/1110153/Apex-memmove-the-fastest-memcpy-memmove-on-x-x-EVE
|
||||||
|
|
||||||
const int = @import("../../interpreter.zig");
|
const tolmac = @import("../../tolmac.zig");
|
||||||
const Word = int.Word;
|
const Word = tolmac.Word;
|
||||||
pub const RecursionLimit = int.RecursionLimit;
|
|
||||||
|
|
||||||
// todo: Variant that pushes array of words.
|
// todo: Variant that pushes array of words.
|
||||||
/// (iw | -- iw)
|
/// (iw | -- iw)
|
||||||
@ -124,18 +124,29 @@ pub fn execute(binary: []const Word, entry_addr: usize) void {
|
|||||||
// https://wiki.osdev.org/System_V_ABI
|
// https://wiki.osdev.org/System_V_ABI
|
||||||
@setCold(true);
|
@setCold(true);
|
||||||
|
|
||||||
var return_stack: [RecursionLimit + 1]Word = undefined;
|
var return_stack: [tolmac.RecursionLimit + 1]Word = undefined;
|
||||||
|
|
||||||
// Such device is used so that opReturn could be used for return.
|
jumpstartSysV(&binary[entry_addr], &return_stack[return_stack.len - 2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
const jumpstartSysV = @as(*const fn (thread: *const Word, return_stack: *Word) callconv(.SysV) void, @ptrCast(&jumpstartNakedSysV));
|
||||||
|
|
||||||
|
fn jumpstartNakedSysV() callconv(.Naked) void {
|
||||||
asm volatile (
|
asm volatile (
|
||||||
|
\\ pushq %%rbp
|
||||||
|
\\ movq %%rsp, %%rbp
|
||||||
|
\\
|
||||||
|
\\ movq %%rdi, %%r12
|
||||||
|
\\ movq %%rsi, %%r13
|
||||||
|
\\
|
||||||
|
\\ # Such device is used so that opReturn could be used for return.
|
||||||
\\ movq $0f, 8(%%r13)
|
\\ movq $0f, 8(%%r13)
|
||||||
\\ leaq 8(%%r13), %%rax
|
\\ leaq 8(%%r13), %%rax
|
||||||
\\ movq %%rax, (%%r13)
|
\\ movq %%rax, (%%r13)
|
||||||
\\ jmpq *(%%r12)
|
\\ jmpq *(%%r12)
|
||||||
\\ 0:
|
\\ 0:
|
||||||
:
|
\\
|
||||||
: [thread] "r" (&binary[entry_addr]),
|
\\ popq %%rbp
|
||||||
[retstk] "r" (&return_stack[return_stack.len - 2]),
|
\\ ret
|
||||||
: "rflags", "rax", "rbx", "rsp", "rdi", "rbp", "r14", "r15", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", "memory"
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
162
src/arch/x86-64/ve-sistema.zig
Normal file
162
src/arch/x86-64/ve-sistema.zig
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
//! ve sistema (.ve-sistema:x86-64)
|
||||||
|
//!
|
||||||
|
//! Provides entry opcodes for System V calling convention, optimized for specific prototypes.
|
||||||
|
//!
|
||||||
|
|
||||||
|
// https://refspecs.linuxbase.org/elf/x86_64-abi-0.99.pdf
|
||||||
|
|
||||||
|
const std = @import("std");
|
||||||
|
|
||||||
|
/// Used for stack parameter passing.
|
||||||
|
pub const WordLimit = 128;
|
||||||
|
const AsmBufferLimit = 4096;
|
||||||
|
const ClassBufferLimit = 256;
|
||||||
|
|
||||||
|
const Class = enum {
|
||||||
|
void, // Denotes empty types.
|
||||||
|
integer,
|
||||||
|
sse,
|
||||||
|
sseup,
|
||||||
|
x87,
|
||||||
|
x87up,
|
||||||
|
no_class,
|
||||||
|
memory,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn determiteClass(comptime T: type, buffer: []Class) []Class {
|
||||||
|
switch (@typeInfo(T)) {
|
||||||
|
.Void => &[1]Class{.void},
|
||||||
|
.Int => |int| {
|
||||||
|
switch (int.bits) {
|
||||||
|
0 => buffer[0] = .void,
|
||||||
|
1...64 => buffer[0] = .integer,
|
||||||
|
65...128 => @compileError("unimplemented"),
|
||||||
|
else => @compileError("unimplemented"),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.Float => |float| {
|
||||||
|
switch (float.bits) {
|
||||||
|
0 => buffer[0] = .void,
|
||||||
|
1...64 => buffer[0] = .sse,
|
||||||
|
65...80 => @compileError("unimplemented"),
|
||||||
|
81...128 => @compileError("unimplemented"),
|
||||||
|
else => @compileError("unimplemented"),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.Bool => buffer[0] = .integer,
|
||||||
|
.Pointer => |ptr| {
|
||||||
|
switch (ptr.size) {
|
||||||
|
.Slice => {
|
||||||
|
buffer[0] = .integer;
|
||||||
|
buffer[1] = .integer;
|
||||||
|
},
|
||||||
|
else => buffer[0] = .integer,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.Fn => buffer[0] = .integer,
|
||||||
|
else => @compileError("unimplemented"),
|
||||||
|
}
|
||||||
|
|
||||||
|
return buffer[0 .. (@sizeOf(T) - 1) / 8 + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo: Make sure duplicates are not made.
|
||||||
|
// todo: Cache results for identical in effect devices.
|
||||||
|
//
|
||||||
|
/// (iw | -- (arbitrary amount of words))
|
||||||
|
pub fn generateOpZovSysvFromPrototype(prototype: anytype) !*const fn () callconv(.Naked) noreturn {
|
||||||
|
// todo: Should we care about this?
|
||||||
|
// > The direction flag DF in the %rFLAGS register must be clear (set to “forward”
|
||||||
|
// > direction) on function entry and return.
|
||||||
|
|
||||||
|
comptime {
|
||||||
|
const func = @typeInfo(@TypeOf(prototype)).Fn;
|
||||||
|
|
||||||
|
var source_buffer = [_]u8{0} ** AsmBufferLimit;
|
||||||
|
var source_needle: usize = 0;
|
||||||
|
|
||||||
|
// todo: Align callee frame to 16?
|
||||||
|
// > shrq $4, %%rsp
|
||||||
|
// > addq $1, %%rsp
|
||||||
|
// > shlq $4, %%rsp
|
||||||
|
|
||||||
|
// idea: Try using REP for big consequent memory pushes.
|
||||||
|
|
||||||
|
// todo: In-stack returns by pointing %rdi directly to final destination.
|
||||||
|
|
||||||
|
const Prelude =
|
||||||
|
\\ movq %%rsp, %%rbp # Move stack pointer in non-volatile %rbp to restore later
|
||||||
|
\\ subq $0x8, %%rsp
|
||||||
|
\\
|
||||||
|
;
|
||||||
|
|
||||||
|
const Call =
|
||||||
|
\\ call *8(%%r12)
|
||||||
|
\\
|
||||||
|
;
|
||||||
|
|
||||||
|
const Epilogue =
|
||||||
|
\\ movq %%rbp, %%rsp # Restore stack pointer
|
||||||
|
\\ addq $0x10, %%r12
|
||||||
|
\\ jmpq *(%%r12)
|
||||||
|
\\
|
||||||
|
;
|
||||||
|
|
||||||
|
@memcpy(source_buffer[source_needle .. source_needle + Prelude.len], Prelude[0..]);
|
||||||
|
source_needle += Prelude.len;
|
||||||
|
|
||||||
|
var integer_allocation: usize = 0;
|
||||||
|
const IntegerAllocations = [_][]const u8{ "rdi", "rsi", "rdx", "rcx", "r8", "r9", "stack" };
|
||||||
|
// var sse_allocation: enum { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, stack } = .xmm0;
|
||||||
|
|
||||||
|
var class_buffer = [_]Class{.void} ** ClassBufferLimit;
|
||||||
|
|
||||||
|
// Calculate stack space used by parameters.
|
||||||
|
var parameter_stack_size: usize = 0;
|
||||||
|
for (func.params) |param| {
|
||||||
|
const classes = determiteClass(param.type.?, &class_buffer);
|
||||||
|
parameter_stack_size += 8 * classes.len;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push parameters to appropriate registers and stack positions.
|
||||||
|
var stack_offset: usize = parameter_stack_size;
|
||||||
|
for (func.params) |param| {
|
||||||
|
const classes = determiteClass(param.type.?, &class_buffer);
|
||||||
|
for (classes) |class| {
|
||||||
|
stack_offset -= 8;
|
||||||
|
switch (class) {
|
||||||
|
.integer => {
|
||||||
|
if (integer_allocation < IntegerAllocations.len - 1) {
|
||||||
|
source_needle += (try std.fmt.bufPrint(
|
||||||
|
source_buffer[source_needle..],
|
||||||
|
"movq {}(%%rbp), %%{s}\n",
|
||||||
|
.{ stack_offset, IntegerAllocations[integer_allocation] },
|
||||||
|
)).len;
|
||||||
|
integer_allocation += 1;
|
||||||
|
} else {
|
||||||
|
source_needle += (try std.fmt.bufPrint(
|
||||||
|
source_buffer[source_needle..],
|
||||||
|
"pushq {}(%%rbp)\n",
|
||||||
|
.{stack_offset},
|
||||||
|
)).len;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
.void => {},
|
||||||
|
else => @compileError("unimplemented"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@memcpy(source_buffer[source_needle .. source_needle + Call.len], Call[0..]);
|
||||||
|
source_needle += Call.len;
|
||||||
|
|
||||||
|
@memcpy(source_buffer[source_needle .. source_needle + Epilogue.len], Epilogue[0..]);
|
||||||
|
source_needle += Epilogue.len;
|
||||||
|
|
||||||
|
return &struct {
|
||||||
|
fn op() callconv(.Naked) noreturn {
|
||||||
|
asm volatile (source_buffer[0..source_needle]);
|
||||||
|
}
|
||||||
|
}.op;
|
||||||
|
}
|
||||||
|
}
|
@ -1,20 +0,0 @@
|
|||||||
// todo: Interpreter context as binary local variable.
|
|
||||||
// It would hold memory mappings, as well as error stack.
|
|
||||||
// todo: Define procedure call for user code.
|
|
||||||
// todo: Instruction set extensions, such as memory management schemes, non-exhaustive logging,
|
|
||||||
// exception mechanism, coroutines via yield/resume and etc.
|
|
||||||
// todo: Threading scheme.
|
|
||||||
// todo: Extension for native floating point stack ops.
|
|
||||||
// todo: Try using small code model with nopie/nopic binary.
|
|
||||||
|
|
||||||
// idea: Specialized opcodes that have side effects on read and write, such as
|
|
||||||
// zero-check on push/pop, or jump if condition bit met. This would create a lot
|
|
||||||
// of permutations tho, we might try to discover which code devices are most used.
|
|
||||||
|
|
||||||
// idea: 'JIT' could be done by simple op* compiled binary copying up until `jmpq *(%%rdi)`,
|
|
||||||
// with immediate operand prelude modified, which could be done procedurally.
|
|
||||||
|
|
||||||
pub const Word = u64;
|
|
||||||
pub const RecursionLimit = 1024;
|
|
||||||
|
|
||||||
pub usingnamespace @import("arch/x86-64.zig");
|
|
42
src/main.zig
42
src/main.zig
@ -1,23 +1,39 @@
|
|||||||
const int = @import("interpreter.zig");
|
const std = @import("std");
|
||||||
|
const tolmac = @import("tolmac.zig");
|
||||||
|
|
||||||
|
fn printInt(int: u64, other: u32, another: u16) callconv(.SysV) void {
|
||||||
|
@setAlignStack(16);
|
||||||
|
std.debug.print("test: {}, {}, {}\n", .{ int, other, another });
|
||||||
|
}
|
||||||
|
|
||||||
|
const opPrintIntZov = tolmac.generateOpZovSysvFromPrototype(printInt) catch unreachable;
|
||||||
|
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
// todo: Mixing return addresses in stack poses a challenge, hm.
|
// todo: Mixing return addresses in stack poses a challenge, hm.
|
||||||
const add = [_]int.Word{
|
const add = [_]tolmac.Word{
|
||||||
@as(int.Word, @intFromPtr(&int.opSumWordsWithOverflow)),
|
@as(tolmac.Word, @intFromPtr(&tolmac.opSumWordsWithOverflow)),
|
||||||
@as(int.Word, @intFromPtr(&int.opReturn)),
|
@as(tolmac.Word, @intFromPtr(&tolmac.opReturn)),
|
||||||
};
|
};
|
||||||
|
|
||||||
const entry = [_]int.Word{
|
const entry = [_]tolmac.Word{
|
||||||
@as(int.Word, @intFromPtr(&int.opPushWord)),
|
@as(tolmac.Word, @intFromPtr(&tolmac.opPushWord)),
|
||||||
1,
|
1,
|
||||||
@as(int.Word, @intFromPtr(&int.opPushWord)),
|
@as(tolmac.Word, @intFromPtr(&tolmac.opPushWord)),
|
||||||
2,
|
2,
|
||||||
@as(int.Word, @intFromPtr(&int.opCall)),
|
@as(tolmac.Word, @intFromPtr(&tolmac.opCall)),
|
||||||
@as(int.Word, @intFromPtr(&add)),
|
@as(tolmac.Word, @intFromPtr(&add)),
|
||||||
@as(int.Word, @intFromPtr(&int.opSinkWord)),
|
@as(tolmac.Word, @intFromPtr(&tolmac.opSinkWord)),
|
||||||
@as(int.Word, @intFromPtr(&int.opSinkWord)),
|
@as(tolmac.Word, @intFromPtr(&tolmac.opPushWord)),
|
||||||
@as(int.Word, @intFromPtr(&int.opReturn)),
|
10,
|
||||||
|
@as(tolmac.Word, @intFromPtr(&tolmac.opPushWord)),
|
||||||
|
20,
|
||||||
|
@as(tolmac.Word, @intFromPtr(opPrintIntZov)),
|
||||||
|
@as(tolmac.Word, @intFromPtr(&printInt)),
|
||||||
|
@as(tolmac.Word, @intFromPtr(&tolmac.opSinkWord)),
|
||||||
|
@as(tolmac.Word, @intFromPtr(&tolmac.opSinkWord)),
|
||||||
|
@as(tolmac.Word, @intFromPtr(&tolmac.opSinkWord)),
|
||||||
|
@as(tolmac.Word, @intFromPtr(&tolmac.opReturn)),
|
||||||
};
|
};
|
||||||
|
|
||||||
int.execute(&entry, 0);
|
tolmac.execute(&entry, 0);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user