restructuring
This commit is contained in:
parent
bd19a43c0d
commit
d97dcff494
@ -1,126 +1 @@
|
||||
// todo: Use r12, r13, ... instead? They're preserved in Sys V abi which might make it more confortable,
|
||||
// but they might increase binary size, gotta test.
|
||||
|
||||
// todo: Try using something else that has lesser opcode size.
|
||||
// Execution thread convention:
|
||||
// r12 <- binary thread pointer
|
||||
// r13 <- return stack pointer
|
||||
|
||||
// Resources used:
|
||||
// https://mort.coffee/home/fast-interpreters/
|
||||
// https://blog.reverberate.org/2021/04/21/musttail-efficient-interpreters.html
|
||||
// https://en.wikibooks.org/wiki/X86_Assembly/GNU_assembly_syntax
|
||||
// https://www.cs.princeton.edu/courses/archive/spr18/cos217/lectures/15_AssemblyFunctions.pdf
|
||||
// https://ziglang.org/documentation/master/#toc-Assembly
|
||||
// https://csiflabs.cs.ucdavis.edu/~ssdavis/50/att-syntax.htm
|
||||
|
||||
pub const Word = u64;
|
||||
pub const RecursionLimit = 1024;
|
||||
|
||||
threadlocal var return_stack: [RecursionLimit + 1]Word = undefined;
|
||||
|
||||
// todo: Variant that pushes array of words.
|
||||
/// (iw | -- iw)
|
||||
pub fn opPushWord() callconv(.Naked) noreturn {
|
||||
asm volatile (
|
||||
\\ add $0x10, %%r12
|
||||
\\ pushq -8(%%r12)
|
||||
\\ jmpq *(%%r12)
|
||||
);
|
||||
}
|
||||
|
||||
// todo: Variant that discards array of words.
|
||||
/// (w --)
|
||||
pub fn opSinkWord() callconv(.Naked) noreturn {
|
||||
asm volatile (
|
||||
\\ add $0x08, %%r12
|
||||
\\ addq $0x08, %%rsp
|
||||
\\ jmpq *(%%r12)
|
||||
);
|
||||
}
|
||||
|
||||
/// (iw | -- (iw'nth word from stack) )
|
||||
// fn opTakeWord(binary: [*]const Word, cond: bool) noreturn {
|
||||
// @setRuntimeSafety(false);
|
||||
// takeWord(binary[1].word);
|
||||
// @call(.always_tail, binary[2].function, .{ &binary[2], cond });
|
||||
// }
|
||||
|
||||
/// (iw | w)
|
||||
// fn opSetWord(binary: [*]const Word, cond: bool) noreturn {
|
||||
// @setRuntimeSafety(false);
|
||||
// setWord(binary[1].word, popWord());
|
||||
// @call(.always_tail, binary[2].function, .{ &binary[2], cond });
|
||||
// }
|
||||
|
||||
// todo: Generate operation permutations procedurally.
|
||||
// todo: Jump on overflow instead of cond setting?
|
||||
/// (w1 w2 -- sum overflow)
|
||||
pub fn opSumWordsWithOverflow() callconv(.Naked) noreturn {
|
||||
// https://www.felixcloutier.com/x86/adc
|
||||
// https://www.felixcloutier.com/x86/setcc
|
||||
// idea: Could https://www.felixcloutier.com/x86/cmovcc be better for overflow push?
|
||||
asm volatile (
|
||||
\\ addq $0x08, %%r12
|
||||
\\ movq (%%rsp), %%rax
|
||||
\\ adcq 8(%%rsp), %%rax
|
||||
\\ movq %%rax, 8(%%rsp)
|
||||
\\ setc %%al
|
||||
\\ movb %%al, (%%rsp)
|
||||
\\ jmpq *(%%r12)
|
||||
);
|
||||
}
|
||||
|
||||
// todo: Generate operation permutations procedurally.
|
||||
// todo: We might not need cond register if conditions and jumps are combined?
|
||||
/// (w1 w2)
|
||||
// fn opRelativeJumpIfGreaterThan(binary: [*]const Word, cond: bool) noreturn {
|
||||
// @setRuntimeSafety(false);
|
||||
// const offset = if (popWord() > popWord()) binary[1].word else 2;
|
||||
// @call(.always_tail, binary[offset].function, .{ &binary[offset], cond });
|
||||
// }
|
||||
|
||||
// todo: Complex call op that would receive immediate mask that would tell
|
||||
// which positions of stack to duplicate, as well as mixing of plain immediate operands.
|
||||
// Or we could decouple it from call, it might be useful at other places.
|
||||
/// (iw |)
|
||||
pub fn opCall() callconv(.Naked) noreturn {
|
||||
asm volatile (
|
||||
\\ leaq 16(%%r12), %%rax
|
||||
\\ subq $0x8, %%r13
|
||||
\\ movq %%rax, (%%r13)
|
||||
\\ movq 8(%%r12), %%r12
|
||||
\\ jmpq *(%%r12)
|
||||
);
|
||||
}
|
||||
|
||||
/// (addr)
|
||||
pub fn opReturn() callconv(.Naked) noreturn {
|
||||
asm volatile (
|
||||
\\ movq (%%r13), %%r12
|
||||
\\ addq $0x08, %%r13
|
||||
\\ jmpq *(%%r12)
|
||||
);
|
||||
}
|
||||
|
||||
// todo: Make sure it's non reentry in one given thread.
|
||||
pub fn execute(binary: []const Word, entry_addr: usize) void {
|
||||
@setCold(true);
|
||||
// todo: Ensure correctness.
|
||||
// https://wiki.osdev.org/System_V_ABI
|
||||
|
||||
// todo: Use remaining stack as return.
|
||||
|
||||
// Such device is used so that opReturn could be used for return.
|
||||
asm volatile (
|
||||
\\ movq $0f, 8(%%r13)
|
||||
\\ leaq 8(%%r13), %%rax
|
||||
\\ movq %%rax, (%%r13)
|
||||
\\ jmpq *(%%r12)
|
||||
\\ 0:
|
||||
:
|
||||
: [thread] "r" (&binary[entry_addr]),
|
||||
[retstk] "r" (&return_stack[return_stack.len - 2]),
|
||||
: "rflags", "rax", "rbx", "rsp", "rdi", "rbp", "r14", "r15", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", "memory"
|
||||
);
|
||||
}
|
||||
pub usingnamespace @import("x86-64/jedino-jedro.zig");
|
||||
|
142
src/arch/x86-64/jedino-jedro.zig
Normal file
142
src/arch/x86-64/jedino-jedro.zig
Normal file
@ -0,0 +1,142 @@
|
||||
//! jedino jedro (.jj:x86-64)
|
||||
//!
|
||||
//! Desired properties:
|
||||
//! - OS agnosticism, meaning it tries to respect conventions posed by target OSes.
|
||||
//! For extensions based open it there should be an enum value indicating host,
|
||||
//! for example, when dealing with extern C functions of shared objects.
|
||||
//! Stack/thread pointers are chosen so that SysV and MS abis callee side preserve those,
|
||||
//! so that we don't need to constantly push and restore on procedure call.
|
||||
|
||||
// todo: Use r12, r13, ... instead? They're preserved in Sys V abi which might make it more confortable,
|
||||
// but they might increase binary size, gotta test.
|
||||
|
||||
// todo: Try using something else that has lesser opcode size.
|
||||
// Execution thread convention:
|
||||
// r12 <- binary thread pointer
|
||||
// r13 <- return stack pointer
|
||||
|
||||
// Resources used:
|
||||
// https://mort.coffee/home/fast-interpreters/
|
||||
// https://blog.reverberate.org/2021/04/21/musttail-efficient-interpreters.html
|
||||
// https://en.wikibooks.org/wiki/X86_Assembly/GNU_assembly_syntax
|
||||
// https://www.cs.princeton.edu/courses/archive/spr18/cos217/lectures/15_AssemblyFunctions.pdf
|
||||
// https://ziglang.org/documentation/master/#toc-Assembly
|
||||
// https://csiflabs.cs.ucdavis.edu/~ssdavis/50/att-syntax.htm
|
||||
|
||||
// Neat things:
|
||||
// https://joryanick.com/retro-fast-x86-memcpy.php
|
||||
// https://www.codeproject.com/Articles/1110153/Apex-memmove-the-fastest-memcpy-memmove-on-x-x-EVE
|
||||
|
||||
const int = @import("../../interpreter.zig");
|
||||
const Word = int.Word;
|
||||
pub const RecursionLimit = int.RecursionLimit;
|
||||
|
||||
threadlocal var return_stack: [RecursionLimit + 1]Word = undefined;
|
||||
|
||||
// todo: Variant that pushes array of words.
|
||||
/// (iw | -- iw)
|
||||
pub fn opPushWord() callconv(.Naked) noreturn {
|
||||
asm volatile (
|
||||
\\ add $0x10, %%r12
|
||||
\\ pushq -8(%%r12)
|
||||
\\ jmpq *(%%r12)
|
||||
);
|
||||
}
|
||||
|
||||
// todo: Variant that discards array of words.
|
||||
/// (w)
|
||||
pub fn opSinkWord() callconv(.Naked) noreturn {
|
||||
asm volatile (
|
||||
\\ add $0x08, %%r12
|
||||
\\ addq $0x08, %%rsp
|
||||
\\ jmpq *(%%r12)
|
||||
);
|
||||
}
|
||||
|
||||
/// (iw | -- (iw'nth word from stack) )
|
||||
// fn opTakeWord(binary: [*]const Word, cond: bool) noreturn {
|
||||
// @setRuntimeSafety(false);
|
||||
// takeWord(binary[1].word);
|
||||
// @call(.always_tail, binary[2].function, .{ &binary[2], cond });
|
||||
// }
|
||||
|
||||
/// (iw | w)
|
||||
// fn opSetWord(binary: [*]const Word, cond: bool) noreturn {
|
||||
// @setRuntimeSafety(false);
|
||||
// setWord(binary[1].word, popWord());
|
||||
// @call(.always_tail, binary[2].function, .{ &binary[2], cond });
|
||||
// }
|
||||
|
||||
// todo: Generate operation permutations procedurally.
|
||||
// todo: Jump on overflow instead of cond setting?
|
||||
/// (w1 w2 -- sum overflow)
|
||||
pub fn opSumWordsWithOverflow() callconv(.Naked) noreturn {
|
||||
// https://www.felixcloutier.com/x86/adc
|
||||
// https://www.felixcloutier.com/x86/setcc
|
||||
// idea: Could https://www.felixcloutier.com/x86/cmovcc be better for overflow push?
|
||||
asm volatile (
|
||||
\\ addq $0x08, %%r12
|
||||
\\ movq (%%rsp), %%rax
|
||||
\\ adcq 8(%%rsp), %%rax
|
||||
\\ movq %%rax, 8(%%rsp)
|
||||
\\ setc %%al
|
||||
\\ movb %%al, (%%rsp)
|
||||
\\ jmpq *(%%r12)
|
||||
);
|
||||
}
|
||||
|
||||
// todo: Generate operation permutations procedurally.
|
||||
// todo: We might not need cond register if conditions and jumps are combined?
|
||||
/// (w1 w2)
|
||||
// fn opRelativeJumpIfGreaterThan(binary: [*]const Word, cond: bool) noreturn {
|
||||
// @setRuntimeSafety(false);
|
||||
// const offset = if (popWord() > popWord()) binary[1].word else 2;
|
||||
// @call(.always_tail, binary[offset].function, .{ &binary[offset], cond });
|
||||
// }
|
||||
|
||||
// todo: Complex call op that would receive immediate mask that would tell
|
||||
// which positions of stack to duplicate, as well as mixing of plain immediate operands.
|
||||
// Or we could decouple it from call, it might be useful at other places.
|
||||
/// (iw |)
|
||||
pub fn opCall() callconv(.Naked) noreturn {
|
||||
asm volatile (
|
||||
\\ leaq 16(%%r12), %%rax
|
||||
\\ subq $0x8, %%r13
|
||||
\\ movq %%rax, (%%r13)
|
||||
\\ movq 8(%%r12), %%r12
|
||||
\\ jmpq *(%%r12)
|
||||
);
|
||||
}
|
||||
|
||||
/// (addr)
|
||||
pub fn opReturn() callconv(.Naked) noreturn {
|
||||
asm volatile (
|
||||
\\ movq (%%r13), %%r12
|
||||
\\ addq $0x08, %%r13
|
||||
\\ jmpq *(%%r12)
|
||||
);
|
||||
}
|
||||
|
||||
// todo: Make sure it's non reentry in one given thread.
|
||||
// todo: Allow passing initial stack via array of words.
|
||||
// todo: Ensure correctness.
|
||||
// todo: Use remaining stack as return.
|
||||
// todo: Make it .C callconv and extern.
|
||||
// todo: Permute by calling conventions.
|
||||
pub fn execute(binary: []const Word, entry_addr: usize) void {
|
||||
// https://wiki.osdev.org/System_V_ABI
|
||||
@setCold(true);
|
||||
|
||||
// Such device is used so that opReturn could be used for return.
|
||||
asm volatile (
|
||||
\\ movq $0f, 8(%%r13)
|
||||
\\ leaq 8(%%r13), %%rax
|
||||
\\ movq %%rax, (%%r13)
|
||||
\\ jmpq *(%%r12)
|
||||
\\ 0:
|
||||
:
|
||||
: [thread] "r" (&binary[entry_addr]),
|
||||
[retstk] "r" (&return_stack[return_stack.len - 2]),
|
||||
: "rflags", "rax", "rbx", "rsp", "rdi", "rbp", "r14", "r15", "rsi", "rdx", "rcx", "r8", "r9", "r10", "r11", "memory"
|
||||
);
|
||||
}
|
@ -14,4 +14,7 @@
|
||||
// idea: 'JIT' could be done by simple op* compiled binary copying up until `jmpq *(%%rdi)`,
|
||||
// with immediate operand prelude modified, which could be done procedurally.
|
||||
|
||||
pub const Word = u64;
|
||||
pub const RecursionLimit = 1024;
|
||||
|
||||
pub usingnamespace @import("arch/x86-64.zig");
|
||||
|
Loading…
Reference in New Issue
Block a user