From e3e68154405a2dea1b5899a3c24d436170dd107b Mon Sep 17 00:00:00 2001 From: liushuyu Date: Mon, 24 Jun 2024 09:50:02 -0600 Subject: [PATCH 01/11] druntime/rt/sections_elf_shared.d: add support for SystemZ (S390x) --- gen/target.cpp | 7 +++- runtime/druntime/src/core/thread/osthread.d | 19 +++++++++++ runtime/druntime/src/rt/dwarfeh.d | 5 +++ runtime/druntime/src/rt/sections_elf_shared.d | 33 +++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/gen/target.cpp b/gen/target.cpp index 794bc5b4d63..0cab94a215c 100644 --- a/gen/target.cpp +++ b/gen/target.cpp @@ -83,6 +83,7 @@ llvm::Type *getRealType(const llvm::Triple &triple) { case Triple::riscv32: case Triple::riscv64: + case Triple::systemz: #if LDC_LLVM_VER >= 1600 case Triple::loongarch32: case Triple::loongarch64: @@ -120,7 +121,6 @@ llvm::Type *getRealType(const llvm::Triple &triple) { default: // 64-bit double precision for all other targets - // FIXME: SystemZ, ... return LLType::getDoubleTy(ctx); } } @@ -324,6 +324,11 @@ const char *TargetCPP::typeMangle(Type *t) { } } + // `long double` on SystemZ is __float128 and mangled as `g` + if (triple.getArch() == llvm::Triple::systemz) { + return "g"; + } + return "e"; } diff --git a/runtime/druntime/src/core/thread/osthread.d b/runtime/druntime/src/core/thread/osthread.d index 8b07397296a..ba8872ffc52 100644 --- a/runtime/druntime/src/core/thread/osthread.d +++ b/runtime/druntime/src/core/thread/osthread.d @@ -1633,6 +1633,25 @@ in (fn) asm pure nothrow @nogc { ( "st.d $fp, %0") : "=m" (regs[17]); } asm pure nothrow @nogc { ( "st.d $sp, %0") : "=m" (sp); } } + else version (SystemZ) + { + size_t[19] regs = void; + asm pure nothrow @nogc { + // save argument/return register + "stg %%r2, %0" : "=m" (regs[0]); + // save callee-saved GPRs (%r6 - %r14) + "stmg %%r6, %%r14, %0" : "=m" (regs[1]); + // save floating point control register + "stfpc %0" : "=m" (regs[10]); + } + static foreach (i; 8 .. 16) + {{ + enum int j = i; + // save %f8 - %f15 + asm pure nothrow @nogc { ( "std %%f"~j.stringof~", %0") : "=m" (regs[i + 3]); } + }} + asm pure nothrow @nogc { ( "stg %%r15, %0") : "=m" (sp); } + } else { static assert(false, "Architecture not supported."); diff --git a/runtime/druntime/src/rt/dwarfeh.d b/runtime/druntime/src/rt/dwarfeh.d index 27023fac445..e950d7ae059 100644 --- a/runtime/druntime/src/rt/dwarfeh.d +++ b/runtime/druntime/src/rt/dwarfeh.d @@ -99,6 +99,11 @@ else version (LoongArch64) enum eh_exception_regno = 4; enum eh_selector_regno = 5; } +else version (SystemZ) +{ + enum eh_exception_regno = 6; + enum eh_selector_regno = 7; +} else { static assert(0, "Unknown EH register numbers for this architecture"); diff --git a/runtime/druntime/src/rt/sections_elf_shared.d b/runtime/druntime/src/rt/sections_elf_shared.d index ddccf7fbc3f..8db8450c6cd 100644 --- a/runtime/druntime/src/rt/sections_elf_shared.d +++ b/runtime/druntime/src/rt/sections_elf_shared.d @@ -40,6 +40,7 @@ version (MIPS32) version = MIPS_Any; version (MIPS64) version = MIPS_Any; version (RISCV32) version = RISCV_Any; version (RISCV64) version = RISCV_Any; +version (SystemZ) version = IBMZ_Any; // debug = PRINTF; import core.internal.elf.dl; @@ -1182,6 +1183,36 @@ version (LDC) extern(C) void* ___tls_get_addr(tls_index* ti) nothrow @nogc; alias __tls_get_addr = ___tls_get_addr; } + else version (IBMZ_Any) + { + import ldc.intrinsics; + /// __tls_get_offset (available since GLibc 2.3) returns the thread pointer offset + /// of the request object. + /// IBM Z does not expose the `__tls_get_addr` function like other architectures. + extern(C) void* __tls_get_offset(size_t offset) nothrow @nogc; + // keep this function internal + private void* __tls_get_addr(tls_index* ti) nothrow @nogc + { + // adapted from GDC's assembler routine: libphobos/libdruntime/config/systemz/get_tls_offset.S + size_t got_offset = cast(size_t)ti; + version (SystemZ) { + // got_offset = &ti - &got (stored in r12) + asm pure nothrow @nogc { + "sgr %0, %%r12" : "=r" (got_offset) : "0" (got_offset); + } + } + else version (S390) + { + asm nothrow @nogc { + "larl %%r12, _GLOBAL_OFFSET_TABLE_" ::: "r12"; + "sr %0, %%r12" : "=r" (got_offset) : "0" (got_offset); + } + } + // the offset is relative to the thread pointer base + // we need to add that to get the final address + return __tls_get_offset(got_offset) + cast(size_t)llvm_thread_pointer(); + } + } else extern(C) void* __tls_get_addr(tls_index* ti) nothrow @nogc; } @@ -1218,6 +1249,8 @@ else version (MIPS_Any) enum TLS_DTV_OFFSET = 0x8000; else version (LoongArch64) enum TLS_DTV_OFFSET = 0x0; +else version (IBMZ_Any) + enum TLS_DTV_OFFSET = 0x0; else static assert( false, "Platform not supported." ); From 685fa9e74ae6aea83770add84e603f221e0bce53 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Tue, 25 Jun 2024 09:21:16 -0600 Subject: [PATCH 02/11] druntime/core/thread/fiber: add fiber implementation for s390x --- runtime/druntime/src/core/thread/fiber.d | 38 ++++++++ runtime/druntime/src/core/threadasm.S | 107 +++++++++++++++++++++++ 2 files changed, 145 insertions(+) diff --git a/runtime/druntime/src/core/thread/fiber.d b/runtime/druntime/src/core/thread/fiber.d index f5d81a3c2c4..7fea4b6910e 100644 --- a/runtime/druntime/src/core/thread/fiber.d +++ b/runtime/druntime/src/core/thread/fiber.d @@ -173,6 +173,14 @@ private version = AlignFiberStackTo16Byte; } } + else version (SystemZ) + { + version (Posix) + { + version = AsmSystemZ_Posix; + version = AsmExternal; + } + } version (Posix) { @@ -575,6 +583,8 @@ version (LDC) version (PPC64) version = CheckFiberMigration; + version (SystemZ) version = CheckFiberMigration; + // Fiber migration across threads is (probably) not possible with ASan fakestack enabled (different parts of the stack // will contain fakestack pointers that were created on different threads...) version (SupportSanitizers) version = CheckFiberMigration; @@ -1903,6 +1913,34 @@ private: push(cast(size_t) &fiber_trampoline); // see threadasm.S for docs pstack += size_t.sizeof; // adjust sp (newp) above lr } + else version (AsmSystemZ_Posix) { + // Unlike a lot of architectures, s390x has a very special way + // to do function calls: by saving registers onto + // "register save area" (which is below the stack frame). + // However, we put fp registers on top of the "register save area" + // because saved fp registers are not part of this area. + // fiber_switchContext expects newp sp to look like this: + // 0: %f15 <-- newp tstack + // -1: %f13 + // -2: %f11 + // ... + // -8: %f6 (not saved) <-- top of register save area + // ... + // -13: %r14 [&fiber_entryPoint] + // ... + // -25: %r2 (not saved) <-- bottom of the register save area + // -26: reserved + // -27: %r0 <-- backchain slot + + version (StackGrowsDown) {} + else + static assert(false, "Only full descending stacks supported on SystemZ"); + + push(cast(size_t) 0x0); // sp + push(cast(size_t) &fiber_entryPoint); // r14 (return address) + pstack -= size_t.sizeof * 22; // skip past space reserved for a lot of stuff + + } else version (AsmAArch64_Posix) { // Like others, FP registers and return address (lr) are kept diff --git a/runtime/druntime/src/core/threadasm.S b/runtime/druntime/src/core/threadasm.S index 77d00674974..08675e14650 100644 --- a/runtime/druntime/src/core/threadasm.S +++ b/runtime/druntime/src/core/threadasm.S @@ -705,6 +705,113 @@ CSYM(fiber_trampoline): // fiber_entryPoint never returns bl CSYM(fiber_entryPoint) .cfi_endproc + +#elif defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +/************************************************************************************ + * SystemZ (S390X) ASM BITS + ************************************************************************************/ + +// Note: Does not support S390 systems, only S390X (IBM Z) systems are covered. +/** + * Performs a context switch. + * + * Parameters: + * r2 - void** - ptr to old stack pointer + * r3 - void* - new stack pointer + * + */ + +.text +.globl fiber_switchContext +.type fiber_switchContext, %function +fiber_switchContext: + .cfi_startproc + .machine "z10" + + /** + * Save the call-saved general purpose registers onto + * the caller register save-area (below the %sp per s390x ABI) + * (This includes return address %r4 and stack pointer %r15) */ + stmg %r6, %r15, 48(%r15) + .cfi_offset 6, -112 + .cfi_offset 7, -104 + .cfi_offset 8, -96 + .cfi_offset 9, -88 + .cfi_offset 10, -80 + .cfi_offset 11, -72 + .cfi_offset 12, -64 + .cfi_offset 13, -56 + .cfi_offset 14, -48 + .cfi_offset 15, -40 + lgr %r1, %r15 + aghi %r15, -64 + /* we need to tell the debugger that the current stack offset is: + 64 (frame size) + 160 (parameter area size) */ + .cfi_def_cfa_offset 224 + /* store the (optional) backchain data */ + stg %r1, 0(%r15) + /* Save callee-saved floating point registers + s390x ABI has a very unique way for storing fp registers: + even-pairs first and odd-pairs last */ + std %f8, 0(%r15) + .cfi_offset 24, -224 + std %f10, 8(%r15) + .cfi_offset 25, -216 + std %f12, 16(%r15) + .cfi_offset 26, -208 + std %f14, 24(%r15) + .cfi_offset 27, -200 + std %f9, 32(%r15) + .cfi_offset 28, -192 + std %f11, 40(%r15) + .cfi_offset 29, -184 + std %f13, 48(%r15) + .cfi_offset 30, -176 + std %f15, 56(%r15) + .cfi_offset 31, -168 + + /* Save stack pointer, the stack pointer is adjusted so that + GC won't see the float point registers */ + stg %r15, 0(%r2) + + /* Load the new context pointer as stack pointer. */ + lgr %r15, %r3 + .cfi_def_cfa_offset 224 + + /* Restore call-saved floating point registers. */ + ld %f8, 0(%r15) + .cfi_offset 24, -224 + ld %f10, 8(%r15) + .cfi_offset 25, -216 + ld %f12, 16(%r15) + .cfi_offset 26, -208 + ld %f14, 24(%r15) + .cfi_offset 27, -200 + ld %f9, 32(%r15) + .cfi_offset 28, -192 + ld %f11, 40(%r15) + .cfi_offset 29, -184 + ld %f13, 48(%r15) + .cfi_offset 30, -176 + ld %f15, 56(%r15) + .cfi_offset 31, -168 + lmg %r6, %r14, 112(%r15) + .cfi_offset 6, -112 + .cfi_offset 7, -104 + .cfi_offset 8, -96 + .cfi_offset 9, -88 + .cfi_offset 10, -80 + .cfi_offset 11, -72 + .cfi_offset 12, -64 + .cfi_offset 13, -56 + .cfi_offset 14, -48 + aghi %r15, 64 + .cfi_def_cfa_offset 160 + + .cfi_return_column 14 + br %r14 + .cfi_endproc + #elif defined(__arm__) && (defined(__ARM_EABI__) || defined(__APPLE__)) /************************************************************************************ * ARM ASM BITS From 51b6700139fce61eaa040f2a1a76d43237b4567c Mon Sep 17 00:00:00 2001 From: liushuyu Date: Wed, 31 Jul 2024 10:25:48 +0800 Subject: [PATCH 03/11] gen/abi: add initial ABI implementations for s390x --- gen/abi/abi.cpp | 2 ++ gen/abi/systemz.cpp | 69 +++++++++++++++++++++++++++++++++++++++++++++ gen/abi/targets.h | 2 ++ 3 files changed, 73 insertions(+) create mode 100644 gen/abi/systemz.cpp diff --git a/gen/abi/abi.cpp b/gen/abi/abi.cpp index d0bbf9bfb36..6d119212f01 100644 --- a/gen/abi/abi.cpp +++ b/gen/abi/abi.cpp @@ -281,6 +281,8 @@ TargetABI *TargetABI::getTarget() { case llvm::Triple::wasm32: case llvm::Triple::wasm64: return getWasmTargetABI(); + case llvm::Triple::systemz: + return getSystemZTargetABI(); default: warning(Loc(), "unknown target ABI, falling back to generic implementation. C/C++ " diff --git a/gen/abi/systemz.cpp b/gen/abi/systemz.cpp new file mode 100644 index 00000000000..666499aab03 --- /dev/null +++ b/gen/abi/systemz.cpp @@ -0,0 +1,69 @@ +//===-- abi-systemz.cpp +//-----------------------------------------------------===// +// +// LDC - the LLVM D compiler +// +// This file is distributed under the BSD-style LDC license. See the LICENSE +// file for details. +// +//===----------------------------------------------------------------------===// +// +// The ABI implementation used for 64 bit big-endian IBM Z targets. +// +// The IBM s390x ELF ABI can be found here: +// https://github.com/IBM/s390x-abi +//===----------------------------------------------------------------------===// + +#include "gen/abi/abi.h" +#include "gen/abi/generic.h" +#include "gen/dvalue.h" +#include "gen/irstate.h" +#include "gen/llvmhelpers.h" +#include "gen/tollvm.h" + +struct SystemZTargetABI : TargetABI { + IndirectByvalRewrite indirectByvalRewrite{}; + + explicit SystemZTargetABI() {} + + bool returnInArg(TypeFunction *tf, bool) override { + if (tf->isref()) { + return false; + } + Type *rt = tf->next->toBasetype(); + return DtoIsInMemoryOnly(rt); + } + + bool passByVal(TypeFunction *, Type *t) override { + return DtoIsInMemoryOnly(t); + } + + void rewriteFunctionType(IrFuncTy &fty) override { + if (!fty.ret->byref) { + rewriteArgument(fty, *fty.ret); + } + + for (auto arg : fty.args) { + if (!arg->byref) { + rewriteArgument(fty, *arg); + } + } + } + + void rewriteArgument(IrFuncTy &fty, IrFuncTyArg &arg) override { + if (!isPOD(arg.type)) { + // non-PODs should be passed in memory + indirectByvalRewrite.applyTo(arg); + return; + } + Type *ty = arg.type->toBasetype(); + // integer types less than 64-bits should be extended to 64 bits + if (ty->isintegral()) { + arg.attrs.addAttribute(ty->isunsigned() ? LLAttribute::ZExt + : LLAttribute::SExt); + } + } +}; + +// The public getter for abi.cpp +TargetABI *getSystemZTargetABI() { return new SystemZTargetABI(); } diff --git a/gen/abi/targets.h b/gen/abi/targets.h index 49098fe2579..0825d0062ea 100644 --- a/gen/abi/targets.h +++ b/gen/abi/targets.h @@ -40,3 +40,5 @@ TargetABI *getX86TargetABI(); TargetABI *getLoongArch64TargetABI(); TargetABI *getWasmTargetABI(); + +TargetABI *getSystemZTargetABI(); From 5c03701073b38ab9ba02741ae79629f88268cb4d Mon Sep 17 00:00:00 2001 From: liushuyu Date: Sun, 17 Nov 2024 13:58:24 -0700 Subject: [PATCH 04/11] gen/ir: add support for s390x special va_arg type --- gen/abi/systemz.cpp | 79 +++++++++++ ir/irstruct.cpp | 3 +- runtime/druntime/src/__importc_builtins.di | 4 + .../druntime/src/core/internal/vararg/s390x.d | 126 ++++++++++++++++++ runtime/druntime/src/core/stdc/stdarg.d | 13 ++ runtime/druntime/src/core/vararg.d | 5 + runtime/druntime/src/object.d | 8 ++ 7 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 runtime/druntime/src/core/internal/vararg/s390x.d diff --git a/gen/abi/systemz.cpp b/gen/abi/systemz.cpp index 666499aab03..45e94245f19 100644 --- a/gen/abi/systemz.cpp +++ b/gen/abi/systemz.cpp @@ -14,6 +14,8 @@ // https://github.com/IBM/s390x-abi //===----------------------------------------------------------------------===// +#include "dmd/identifier.h" +#include "dmd/nspace.h" #include "gen/abi/abi.h" #include "gen/abi/generic.h" #include "gen/dvalue.h" @@ -26,6 +28,20 @@ struct SystemZTargetABI : TargetABI { explicit SystemZTargetABI() {} + bool isSystemZVaList(Type *t) { + // look for a __va_list struct in a `std` C++ namespace + if (auto ts = t->isTypeStruct()) { + auto sd = ts->sym; + if (strcmp(sd->ident->toChars(), "__va_list") == 0) { + if (auto ns = sd->parent->isNspace()) { + return strcmp(ns->toChars(), "std") == 0; + } + } + } + + return false; + } + bool returnInArg(TypeFunction *tf, bool) override { if (tf->isref()) { return false; @@ -57,12 +73,75 @@ struct SystemZTargetABI : TargetABI { return; } Type *ty = arg.type->toBasetype(); + // compiler magic: pass va_list args implicitly by reference + if (isSystemZVaList(ty)) { + arg.byref = true; + arg.ltype = arg.ltype->getPointerTo(); + return; + } // integer types less than 64-bits should be extended to 64 bits if (ty->isintegral()) { arg.attrs.addAttribute(ty->isunsigned() ? LLAttribute::ZExt : LLAttribute::SExt); } } + + Type *vaListType() override { + // We need to pass the actual va_list type for correct mangling. Simply + // using TypeIdentifier here is a bit wonky but works, as long as the name + // is actually available in the scope (this is what DMD does, so if a + // better solution is found there, this should be adapted). + return dmd::pointerTo( + TypeIdentifier::create(Loc(), Identifier::idPool("__va_list_tag"))); + } + + /** + * The SystemZ ABI (like AMD64) uses a special native va_list type - + * a 32-bytes struct passed by reference. + * In druntime, the struct is aliased as object.__va_list_tag; the actually + * used core.stdc.stdarg.va_list type is a __va_list_tag* pointer though to + * achieve byref semantics. This requires a little bit of compiler magic in + * the following implementations. + */ + + LLType *getValistType() { + LLType *longType = LLType::getInt64Ty(gIR->context()); + LLType *pointerType = getOpaquePtrType(); + + std::vector parts; // struct __va_list_tag { + parts.push_back(longType); // long __gpr; + parts.push_back(longType); // long __fpr; + parts.push_back(pointerType); // void *__overflow_arg_area; + parts.push_back(pointerType); // void *__reg_save_area; } + + return LLStructType::get(gIR->context(), parts); + } + + LLValue *prepareVaStart(DLValue *ap) override { + // Since the user only created a __va_list_tag* pointer (ap) on the stack + // before invoking va_start, we first need to allocate the actual + // __va_list_tag struct and set `ap` to its address. + LLValue *valistmem = DtoRawAlloca(getValistType(), 0, "__va_list_mem"); + DtoStore(valistmem, DtoLVal(ap)); + // Pass an opaque pointer to the actual struct to LLVM's va_start intrinsic. + return valistmem; + } + + void vaCopy(DLValue *dest, DValue *src) override { + // Analog to va_start, we first need to allocate a new __va_list_tag struct + // on the stack and set `dest` to its address. + LLValue *valistmem = DtoRawAlloca(getValistType(), 0, "__va_list_mem"); + DtoStore(valistmem, DtoLVal(dest)); + // Then fill the new struct with a bitcopy of the source struct. + // `src` is a __va_list_tag* pointer to the source struct. + DtoMemCpy(getValistType(), valistmem, DtoRVal(src)); + } + + LLValue *prepareVaArg(DLValue *ap) override { + // Pass an opaque pointer to the actual __va_list_tag struct to LLVM's + // va_arg intrinsic. + return DtoRVal(ap); + } }; // The public getter for abi.cpp diff --git a/ir/irstruct.cpp b/ir/irstruct.cpp index 1b0f4003c0d..85b68f1b422 100644 --- a/ir/irstruct.cpp +++ b/ir/irstruct.cpp @@ -107,7 +107,8 @@ LLConstant *IrStruct::getTypeInfoInit() { const bool withArgTypes = (arch == llvm::Triple::x86_64 && !triple.isOSWindows()) || (!triple.isOSDarwin() && // Apple uses a simpler scheme - (arch == llvm::Triple::aarch64 || arch == llvm::Triple::aarch64_be)); + (arch == llvm::Triple::aarch64 || arch == llvm::Triple::aarch64_be)) || + (arch == llvm::Triple::systemz); const unsigned expectedFields = 11 + (withArgTypes ? 2 : 0); const unsigned actualFields = structTypeInfoDecl->fields.length - diff --git a/runtime/druntime/src/__importc_builtins.di b/runtime/druntime/src/__importc_builtins.di index fc17bce4a22..d50c643c9d9 100644 --- a/runtime/druntime/src/__importc_builtins.di +++ b/runtime/druntime/src/__importc_builtins.di @@ -51,6 +51,10 @@ version (LDC) else version (AArch64) public import core.internal.vararg.aarch64 : __va_list; } + else version (SystemZ) + { + public import core.internal.vararg.s390x : __va_list; + } } else version (Posix) { diff --git a/runtime/druntime/src/core/internal/vararg/s390x.d b/runtime/druntime/src/core/internal/vararg/s390x.d new file mode 100644 index 00000000000..23d2cd9e337 --- /dev/null +++ b/runtime/druntime/src/core/internal/vararg/s390x.d @@ -0,0 +1,126 @@ +module core.internal.vararg.s390x; + +version (SystemZ) : import core.stdc.stdarg : alignUp; + +nothrow: + +// Layout of this struct must match __gnuc_va_list for C ABI compatibility +struct __va_list_tag +{ + long __gpr = 0; // no regs + long __fpr = 0; // no fp regs + void* __overflow_arg_area; + void* __reg_save_area; +} + +alias __va_list = __va_list_tag; + +/** + * Making it an array of 1 causes va_list to be passed as a pointer in + * function argument lists + */ +alias va_list = __va_list*; + +/// Compile-time `va_arg` extraction for s390x +T va_arg(T)(va_list ap) +{ + static if (is(T U == __argTypes)) + { + static if (U.length == 0 || U[0].sizeof > 8 || is(T1 == __vector)) + { + // Always passed in memory (varying vectors are passed in parameter area) + auto p = *cast(T*) ap.__overflow_arg_area; + ap.__overflow_arg_area = p + T.alignof.alignUp; + return p; + } + else static if (U.length == 1) + { + // Arg is passed in one register + alias T1 = U[0]; + static if (is(T1 == double) || is(T1 == float)) + { + // Maybe passed in $fr registers + if (ap.__fpr <= 4) + { + // Passed in $fr registers (FPR region starts at +0x80) + auto p = cast(T*) ap.__reg_save_area + 128 + ap.__fpr * 8; + ap.__fpr++; + return p; + } + else + { + // overflow arguments + auto p = cast(T*) ap.__overflow_arg_area; + // no matter the actual size of the fp variable + // parameter slot is always 8-byte-wide (f32 is extended to f64) + ap.__overflow_arg_area += 8; + return p; + } + } + else + { + // Maybe passed in $r (GPR) registers + if (ap.__gpr <= 5) + { + // Passed in $gpr registers (GPR region starts at +0x10) + auto p = cast(T*) ap.__reg_save_area + 16 + ap.__gpr * 8; + ap.__gpr++; + return p; + } + else + { + // overflow arguments + auto p = cast(T*) ap.__overflow_arg_area; + // no matter the actual size of the gpr variable + // parameter slot is always 8-byte-wide (after ABI adjustments) + ap.__overflow_arg_area += 8; + return p; + } + } + } + else + { + static assert(false); + } + } + else + { + static assert(false, "not a valid argument type for va_arg"); + } +} + +/// Runtime `va_arg` extraction for s390x +void va_arg()(va_list ap, TypeInfo ti, void* parmn) +{ + TypeInfo arg1, arg2; + if (!ti.argTypes(arg1, arg2)) + { + TypeInfo_Vector v1 = arg1 ? cast(TypeInfo_Vector) arg1 : null; + if (arg1 && (arg1.tsize <= 8 && !v1)) + { + auto tsize = arg1.tsize; + // Maybe passed in $r (GPR) registers + if (ap.__gpr <= 5) + { + // Passed in $gpr registers (GPR region starts at +0x10) + auto p = cast(T*) ap.__reg_save_area + 16 + ap.__gpr * 8; + ap.__gpr++; + parmn[0..tsize] = p[0..tsize]; + } + else + { + // overflow arguments + auto p = cast(T*) ap.__overflow_arg_area; + // no matter the actual size of the gpr variable + // parameter slot is always 8-byte-wide (after ABI adjustments) + ap.__overflow_arg_area += 8; + parmn[0..tsize] = p[0..tsize]; + } + } + assert(!arg2); + } + else + { + assert(false, "not a valid argument type for va_arg"); + } +} diff --git a/runtime/druntime/src/core/stdc/stdarg.d b/runtime/druntime/src/core/stdc/stdarg.d index 0ba1ebe34e3..00a8bdef114 100644 --- a/runtime/druntime/src/core/stdc/stdarg.d +++ b/runtime/druntime/src/core/stdc/stdarg.d @@ -72,6 +72,10 @@ else version (ARM_Any) static import core.internal.vararg.aarch64; } } +else version (SystemZ) +{ + static import core.internal.vararg.s390x; +} T alignUp(size_t alignment = size_t.sizeof, T)(T base) pure @@ -137,6 +141,11 @@ else version (RISCV_Any) // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc alias va_list = void*; } +else version (SystemZ) +{ + alias va_list = core.internal.vararg.s390x.va_list; + public import core.internal.vararg.s390x : __va_list, __va_list_tag; +} else { alias va_list = char*; // incl. unknown platforms @@ -285,6 +294,10 @@ T va_arg(T)(ref va_list ap) ap += T.sizeof.alignUp; return *p; } + else version (SystemZ) + { + return core.internal.vararg.s390x.va_arg!T(ap); + } else static assert(0, "Unsupported platform"); } diff --git a/runtime/druntime/src/core/vararg.d b/runtime/druntime/src/core/vararg.d index e6dd47d06d3..09b6d62aa0f 100644 --- a/runtime/druntime/src/core/vararg.d +++ b/runtime/druntime/src/core/vararg.d @@ -118,6 +118,11 @@ void va_arg()(ref va_list ap, TypeInfo ti, void* parmn) ap += tsize.alignUp; parmn[0..tsize] = p[0..tsize]; } + else version (SystemZ) + { + static import core.internal.vararg.s390x; + core.internal.vararg.s390x.va_arg(ap, ti, parmn); + } else version (PPC_Any) { if (ti.talign >= 8) diff --git a/runtime/druntime/src/object.d b/runtime/druntime/src/object.d index 582467ca300..6c75d91ff5e 100644 --- a/runtime/druntime/src/object.d +++ b/runtime/druntime/src/object.d @@ -103,6 +103,10 @@ version (LDC) // note: there's a copy for importC in __importc_builtins.di else version (AArch64) public import core.internal.vararg.aarch64 : __va_list; } + else version (SystemZ) + { + public import core.internal.vararg.s390x : __va_list; + } } version (D_ObjectiveC) @@ -132,6 +136,10 @@ else version (AArch64) else version (WatchOS) {} else version = WithArgTypes; } +else version (SystemZ) +{ + version = WithArgTypes; +} /** * All D class objects inherit from Object. From bb76c846e9f3dc310ddb4c5b816b5e1d26bc23dc Mon Sep 17 00:00:00 2001 From: liushuyu Date: Sun, 17 Nov 2024 14:00:55 -0700 Subject: [PATCH 05/11] osthread.d: add callWithStackShell fallback --- runtime/druntime/src/core/thread/osthread.d | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/runtime/druntime/src/core/thread/osthread.d b/runtime/druntime/src/core/thread/osthread.d index ba8872ffc52..bc72b71d8ba 100644 --- a/runtime/druntime/src/core/thread/osthread.d +++ b/runtime/druntime/src/core/thread/osthread.d @@ -44,6 +44,9 @@ version (LDC) { import ldc.sanitizers_optionally_linked; } + + pragma(LDC_intrinsic, "llvm.eh.unwind.init") + void llvm_unwind_init(); } @@ -1654,7 +1657,8 @@ in (fn) } else { - static assert(false, "Architecture not supported."); + llvm_unwind_init(); + sp = &sp; } } else From e1ac2e27dc84f8afb586d590c40d8cd73486bda1 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Tue, 31 Dec 2024 20:38:43 -0700 Subject: [PATCH 06/11] gen/abi: add more s390x ABI rewrites --- gen/abi/systemz.cpp | 59 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/gen/abi/systemz.cpp b/gen/abi/systemz.cpp index 45e94245f19..f911dd298d6 100644 --- a/gen/abi/systemz.cpp +++ b/gen/abi/systemz.cpp @@ -23,8 +23,30 @@ #include "gen/llvmhelpers.h" #include "gen/tollvm.h" +using namespace dmd; + +struct StructSimpleFlattenRewrite : BaseBitcastABIRewrite { + LLType *type(Type *ty) override { + const size_t type_size = size(ty); + // "A struct or a union of 1, 2, 4, or 8 bytes" + switch (type_size) { + case 1: + return LLType::getInt8Ty(gIR->context()); + case 2: + return LLType::getInt16Ty(gIR->context()); + case 4: + return LLType::getInt32Ty(gIR->context()); + case 8: + return LLType::getInt64Ty(gIR->context()); + default: + return DtoType(ty); + } + } +}; + struct SystemZTargetABI : TargetABI { IndirectByvalRewrite indirectByvalRewrite{}; + StructSimpleFlattenRewrite structSimpleFlattenRewrite{}; explicit SystemZTargetABI() {} @@ -32,7 +54,7 @@ struct SystemZTargetABI : TargetABI { // look for a __va_list struct in a `std` C++ namespace if (auto ts = t->isTypeStruct()) { auto sd = ts->sym; - if (strcmp(sd->ident->toChars(), "__va_list") == 0) { + if (strcmp(sd->ident->toChars(), "__va_list_tag") == 0) { if (auto ns = sd->parent->isNspace()) { return strcmp(ns->toChars(), "std") == 0; } @@ -47,10 +69,33 @@ struct SystemZTargetABI : TargetABI { return false; } Type *rt = tf->next->toBasetype(); - return DtoIsInMemoryOnly(rt); + if (rt->ty == TY::Tstruct) { + return true; + } + if (rt->isTypeVector() && size(rt) > 16) { + return true; + } + return shouldPassByVal(tf->next); } bool passByVal(TypeFunction *, Type *t) override { + // LLVM's byval attribute is not compatible with the SystemZ ABI + // due to how SystemZ's stack is setup + return false; + } + + bool shouldPassByVal(Type *t) { + if (t->ty == TY::Tstruct && size(t) <= 8) { + return false; + } + // "A struct or union of any other size, a complex type, an __int128, a long + // double, a _Decimal128, or a vector whose size exceeds 16 bytes" + if (size(t) > 16 || t->iscomplex() || t->isimaginary()) { + return true; + } + if (t->ty == TY::Tint128 || t->ty == TY::Tcomplex80) { + return true; + } return DtoIsInMemoryOnly(t); } @@ -67,7 +112,7 @@ struct SystemZTargetABI : TargetABI { } void rewriteArgument(IrFuncTy &fty, IrFuncTyArg &arg) override { - if (!isPOD(arg.type)) { + if (!isPOD(arg.type) || shouldPassByVal(arg.type)) { // non-PODs should be passed in memory indirectByvalRewrite.applyTo(arg); return; @@ -80,10 +125,16 @@ struct SystemZTargetABI : TargetABI { return; } // integer types less than 64-bits should be extended to 64 bits - if (ty->isintegral()) { + if (ty->isintegral() && + !(ty->ty == TY::Tstruct || ty->ty == TY::Tsarray || + ty->ty == TY::Tvector) && + size(ty) < 8) { arg.attrs.addAttribute(ty->isunsigned() ? LLAttribute::ZExt : LLAttribute::SExt); } + if (ty->isTypeStruct() && size(ty) <= 8) { + structSimpleFlattenRewrite.applyToIfNotObsolete(arg); + } } Type *vaListType() override { From 23e836bbf49e88fede8b7a809d302956264a9367 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Tue, 31 Dec 2024 20:40:51 -0700 Subject: [PATCH 07/11] druntime/thread: add osthread support for s390x --- runtime/druntime/src/core/thread/osthread.d | 29 +++++++++------------ 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/runtime/druntime/src/core/thread/osthread.d b/runtime/druntime/src/core/thread/osthread.d index bc72b71d8ba..26236151740 100644 --- a/runtime/druntime/src/core/thread/osthread.d +++ b/runtime/druntime/src/core/thread/osthread.d @@ -46,7 +46,7 @@ version (LDC) } pragma(LDC_intrinsic, "llvm.eh.unwind.init") - void llvm_unwind_init(); + void llvm_unwind_init() nothrow @nogc; } @@ -1571,8 +1571,8 @@ in (fn) }} asm pure nothrow @nogc { ("sd $gp, %0") : "=m" (regs[8]); - ("sd $fp, %0") : "=m" (regs[9]); - ("sd $ra, %0") : "=m" (sp); + ("sd $fp, %0") : "=m" (regs[9]); + ("sd $sp, %0") : "=m" (sp); } } else version (MIPS_Any) @@ -1638,22 +1638,14 @@ in (fn) } else version (SystemZ) { - size_t[19] regs = void; + // Callee-save registers, according to SystemZ Calling Convention + // https://github.com/IBM/s390x-abi/blob/main/lzsabi.tex + size_t[10] regs = void; asm pure nothrow @nogc { - // save argument/return register - "stg %%r2, %0" : "=m" (regs[0]); - // save callee-saved GPRs (%r6 - %r14) - "stmg %%r6, %%r14, %0" : "=m" (regs[1]); - // save floating point control register - "stfpc %0" : "=m" (regs[10]); + // save callee-saved GPRs (%r6 - %r15) + "stmg %%r6, %%r15, %0" : "=m" (regs[0]); } - static foreach (i; 8 .. 16) - {{ - enum int j = i; - // save %f8 - %f15 - asm pure nothrow @nogc { ( "std %%f"~j.stringof~", %0") : "=m" (regs[i + 3]); } - }} - asm pure nothrow @nogc { ( "stg %%r15, %0") : "=m" (sp); } + sp = cast(void*)regs[9]; } else { @@ -1723,6 +1715,7 @@ version (LDC) version (ARM_Any) version = LDC_stackTopAsm; version (PPC_Any) version = LDC_stackTopAsm; version (MIPS_Any) version = LDC_stackTopAsm; + version (SystemZ) version = LDC_stackTopAsm; version (LDC_stackTopAsm) { @@ -1742,6 +1735,8 @@ version (LDC) return __asm!(void*)("mr $0, 1", "=r"); else version (MIPS_Any) return __asm!(void*)("move $0, $$sp", "=r"); + else version (SystemZ) + return __asm!(void*)("lgr $0, %r15", "=r"); else static assert(0); } From 81efb4465458226f78deeb010e2c63c7a1a628bf Mon Sep 17 00:00:00 2001 From: liushuyu Date: Tue, 24 Dec 2024 22:01:16 -0700 Subject: [PATCH 08/11] gen/ctfloat: make CTFloat big-endian aware --- gen/ctfloat.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/gen/ctfloat.cpp b/gen/ctfloat.cpp index ef033c018df..88fa314fe57 100644 --- a/gen/ctfloat.cpp +++ b/gen/ctfloat.cpp @@ -82,6 +82,12 @@ void CTFloat::toAPFloat(const real_t src, APFloat &dst) { CTFloatUnion u; u.fp = src; +#ifdef __FLOAT_WORD_ORDER +#if __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__ + std::swap(u.bits[0], u.bits[1]); +#endif // __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__ +#endif // __FLOAT_WORD_ORDER + const unsigned sizeInBits = APFloat::getSizeInBits(*apSemantics); const APInt bits = APInt(sizeInBits, numUint64Parts, u.bits); @@ -97,11 +103,20 @@ real_t CTFloat::fromAPFloat(const APFloat &src_) { src.convert(*apSemantics, APFloat::rmNearestTiesToEven, &ignored); } +#if LDC_LLVM_VER >= 2001 && defined(HAS_IEE754_FLOAT128) + return src.convertToQuad(); +#else const APInt bits = src.bitcastToAPInt(); - - CTFloatUnion u; - memcpy(u.bits, bits.getRawData(), bits.getBitWidth() / 8); + CTFloatUnion u{}; + memcpy(u.bits, bits.getRawData(), + std::min(static_cast(bits.getNumWords()) * 8, sizeof(u.bits))); +#ifdef __FLOAT_WORD_ORDER +#if __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__ + std::swap(u.bits[0], u.bits[1]); +#endif // __FLOAT_WORD_ORDER == __ORDER_BIG_ENDIAN__ +#endif // __FLOAT_WORD_ORDER return u.fp; +#endif } //////////////////////////////////////////////////////////////////////////////// From 1f966ebbd6ffe62c954780872a9bb1f4c913e793 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Sat, 18 Jan 2025 15:05:37 -0700 Subject: [PATCH 09/11] gen/abi: flatten single float struct to float on s390x --- gen/abi/systemz.cpp | 48 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/gen/abi/systemz.cpp b/gen/abi/systemz.cpp index f911dd298d6..c1a7d72560f 100644 --- a/gen/abi/systemz.cpp +++ b/gen/abi/systemz.cpp @@ -25,6 +25,45 @@ using namespace dmd; +struct SimpleHardfloatRewrite : ABIRewrite { + Type *getFirstFieldType(Type *ty) { + if (auto ts = ty->toBasetype()->isTypeStruct()) { + assert(ts->sym->fields.size() == 1); + auto *subField = ts->sym->fields[0]; + if (subField->type->isfloating()) { + return subField->type; + } + return nullptr; + } + return nullptr; + } + + LLValue *put(DValue *dv, bool, bool) override { + const auto flat = getFirstFieldType(dv->type); + LLType *asType = DtoType(flat); + assert(dv->isLVal()); + LLValue *flatGEP = DtoGEP1(asType, DtoLVal(dv), 0U); + LLValue *flatValue = DtoLoad(asType, flatGEP, ".HardfloatRewrite_arg"); + return flatValue; + } + + LLValue *getLVal(Type *dty, LLValue *v) override { + // inverse operation of method "put" + LLValue *insertedValue = DtoInsertValue(llvm::UndefValue::get(DtoType(dty)), v, 0); + return DtoAllocaDump(insertedValue, dty, ".HardfloatRewrite_param_storage"); + } + + LLType *type(Type *ty) override { return DtoType(getFirstFieldType(ty)); } + + bool shouldApplyRewrite(Type *ty) { + if (auto ts = ty->toBasetype()->isTypeStruct()) { + return ts->sym->fields.size() == 1 && + ts->sym->fields[0]->type->isfloating(); + } + return false; + } +}; + struct StructSimpleFlattenRewrite : BaseBitcastABIRewrite { LLType *type(Type *ty) override { const size_t type_size = size(ty); @@ -47,6 +86,7 @@ struct StructSimpleFlattenRewrite : BaseBitcastABIRewrite { struct SystemZTargetABI : TargetABI { IndirectByvalRewrite indirectByvalRewrite{}; StructSimpleFlattenRewrite structSimpleFlattenRewrite{}; + SimpleHardfloatRewrite simpleHardfloatRewrite{}; explicit SystemZTargetABI() {} @@ -132,8 +172,12 @@ struct SystemZTargetABI : TargetABI { arg.attrs.addAttribute(ty->isunsigned() ? LLAttribute::ZExt : LLAttribute::SExt); } - if (ty->isTypeStruct() && size(ty) <= 8) { - structSimpleFlattenRewrite.applyToIfNotObsolete(arg); + if (ty->isTypeStruct()) { + if (simpleHardfloatRewrite.shouldApplyRewrite(ty)) { + simpleHardfloatRewrite.applyTo(arg); + } else if (size(ty) <= 8) { + structSimpleFlattenRewrite.applyToIfNotObsolete(arg); + } } } From 84dfbf51bc981d231890faea0bd3cdf366f5b58b Mon Sep 17 00:00:00 2001 From: liushuyu Date: Sun, 19 Jan 2025 00:01:06 -0700 Subject: [PATCH 10/11] dmd: more s390x va_arg implementations --- dmd/argtypes.h | 2 + dmd/argtypes_s390x.d | 74 +++++++++++++++++++ dmd/cxxfrontend.d | 9 +++ gen/target.cpp | 2 + .../druntime/src/core/internal/vararg/s390x.d | 51 +++++++++++-- 5 files changed, 133 insertions(+), 5 deletions(-) create mode 100644 dmd/argtypes_s390x.d diff --git a/dmd/argtypes.h b/dmd/argtypes.h index 8d018ea324b..d2e0800a57d 100644 --- a/dmd/argtypes.h +++ b/dmd/argtypes.h @@ -21,5 +21,7 @@ namespace dmd TypeTuple *toArgTypes_sysv_x64(Type *t); // in argtypes_aarch64.d TypeTuple *toArgTypes_aarch64(Type *t); + // in argtypes_s390x.d + TypeTuple *toArgTypes_s390x(Type *t); bool isHFVA(Type *t, int maxNumElements = 4, Type **rewriteType = nullptr); } diff --git a/dmd/argtypes_s390x.d b/dmd/argtypes_s390x.d new file mode 100644 index 00000000000..03a4b1fe451 --- /dev/null +++ b/dmd/argtypes_s390x.d @@ -0,0 +1,74 @@ +/** + * Break down a D type into basic (register) types for the IBM Z ELF ABI. + * + * Copyright: Copyright (C) 2024-2025 by The D Language Foundation, All Rights Reserved + * Authors: Martin Kinkelin + * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) + * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/argtypes_s390x.d, _argtypes_s390x.d) + * Documentation: https://dlang.org/phobos/dmd_argtypes_s390x.html + * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/argtypes_s390x.d + */ + +module dmd.argtypes_s390x; + +import dmd.astenums; +import dmd.mtype; +import dmd.typesem; + +/**************************************************** + * This breaks a type down into 'simpler' types that can be passed to a function + * in registers, and returned in registers. + * This is the implementation for the IBM Z ELF ABI, + * based on https://github.com/IBM/s390x-abi/releases/download/v1.6/lzsabi_s390x.pdf. + * Params: + * t = type to break down + * Returns: + * tuple of types, each element can be passed in a register. + * A tuple of zero length means the type cannot be passed/returned in registers. + * null indicates a `void`. + */ +TypeTuple toArgTypes_s390x(Type t) +{ + if (t == Type.terror) + return new TypeTuple(t); + + const size = cast(size_t) t.size(); + if (size == 0) + return null; + + // TODO + // Implement the rest of the va args passing + //... + Type tb = t.toBasetype(); + const isAggregate = tb.ty == Tstruct || tb.ty == Tsarray || tb.ty == Tarray || tb.ty == Tdelegate || tb.iscomplex(); + if (!isAggregate) + return new TypeTuple(t); + // unwrap single-float struct per ABI requirements + if (auto tstruct = t.isTypeStruct()) + { + if (tstruct.sym.fields.length == 1) + { + Type fieldType = tstruct.sym.fields[0].type.toBasetype(); + if (fieldType.isfloating()) + { + return new TypeTuple(fieldType); + } + } + } + + // pass remaining aggregates in 1 or 2 GP registers + static Type getGPType(size_t size) + { + switch (size) + { + case 1: return Type.tint8; + case 2: return Type.tint16; + case 4: return Type.tint32; + case 8: return Type.tint64; + default: + import dmd.typesem : sarrayOf; + return Type.tint64.sarrayOf((size + 7) / 8); + } + } + return new TypeTuple(getGPType(size)); +} \ No newline at end of file diff --git a/dmd/cxxfrontend.d b/dmd/cxxfrontend.d index 47a443e3787..f4a6528e82e 100644 --- a/dmd/cxxfrontend.d +++ b/dmd/cxxfrontend.d @@ -698,4 +698,13 @@ version (IN_LLVM) import dmd.argtypes_x86; return dmd.argtypes_x86.toArgTypes_x86(t); } + + /*********************************************************** + * argtypes_s390x.d + */ + TypeTuple toArgTypes_s390x(Type t) + { + import dmd.argtypes_s390x; + return dmd.argtypes_s390x.toArgTypes_s390x(t); + } } diff --git a/gen/target.cpp b/gen/target.cpp index 0cab94a215c..ebea59c1ff7 100644 --- a/gen/target.cpp +++ b/gen/target.cpp @@ -344,6 +344,8 @@ TypeTuple *Target::toArgTypes(Type *t) { return toArgTypes_sysv_x64(t); if (arch == llvm::Triple::aarch64 || arch == llvm::Triple::aarch64_be) return toArgTypes_aarch64(t); + if (arch == llvm::Triple::systemz) + return toArgTypes_s390x(t); return nullptr; } diff --git a/runtime/druntime/src/core/internal/vararg/s390x.d b/runtime/druntime/src/core/internal/vararg/s390x.d index 23d2cd9e337..ad02a94379d 100644 --- a/runtime/druntime/src/core/internal/vararg/s390x.d +++ b/runtime/druntime/src/core/internal/vararg/s390x.d @@ -26,7 +26,7 @@ T va_arg(T)(va_list ap) { static if (is(T U == __argTypes)) { - static if (U.length == 0 || U[0].sizeof > 8 || is(T1 == __vector)) + static if (U.length == 0 || U[0].sizeof > 8 || is(U[0] == __vector)) { // Always passed in memory (varying vectors are passed in parameter area) auto p = *cast(T*) ap.__overflow_arg_area; @@ -45,7 +45,7 @@ T va_arg(T)(va_list ap) // Passed in $fr registers (FPR region starts at +0x80) auto p = cast(T*) ap.__reg_save_area + 128 + ap.__fpr * 8; ap.__fpr++; - return p; + return *p; } else { @@ -54,7 +54,7 @@ T va_arg(T)(va_list ap) // no matter the actual size of the fp variable // parameter slot is always 8-byte-wide (f32 is extended to f64) ap.__overflow_arg_area += 8; - return p; + return *p; } } else @@ -65,7 +65,7 @@ T va_arg(T)(va_list ap) // Passed in $gpr registers (GPR region starts at +0x10) auto p = cast(T*) ap.__reg_save_area + 16 + ap.__gpr * 8; ap.__gpr++; - return p; + return *p; } else { @@ -74,7 +74,7 @@ T va_arg(T)(va_list ap) // no matter the actual size of the gpr variable // parameter slot is always 8-byte-wide (after ABI adjustments) ap.__overflow_arg_area += 8; - return p; + return *p; } } } @@ -93,6 +93,23 @@ T va_arg(T)(va_list ap) void va_arg()(va_list ap, TypeInfo ti, void* parmn) { TypeInfo arg1, arg2; + if (TypeInfo_Struct ti_struct = cast(TypeInfo_Struct) ti) + { + // handle single-float element struct + const rtFields = ti_struct.offTi(); + if (rtFields && rtFields.length == 1) + { + TypeInfo field1TypeInfo = rtFields[0].ti; + if (field1TypeInfo is typeid(float) || field1TypeInfo is typeid(double)) + { + auto tsize = field1TypeInfo.tsize; + auto toffset = rtFields[0].offset; + parmn[0..tsize] = p[toffset..tsize]; + return; + } + } + } + if (!ti.argTypes(arg1, arg2)) { TypeInfo_Vector v1 = arg1 ? cast(TypeInfo_Vector) arg1 : null; @@ -117,6 +134,30 @@ void va_arg()(va_list ap, TypeInfo ti, void* parmn) parmn[0..tsize] = p[0..tsize]; } } + else if (arg1 && (arg1 is typeid(float) || arg1 is typeid(double))) + { + // Maybe passed in $fr registers + if (ap.__fpr <= 4) + { + // Passed in $fr registers (FPR region starts at +0x80) + auto p = cast(T*) ap.__reg_save_area + 128 + ap.__fpr * 8; + ap.__fpr++; + parmn[0..tsize] = p[0..tsize]; + } + else + { + // overflow arguments + auto p = cast(T*) ap.__overflow_arg_area; + // no matter the actual size of the fp variable + // parameter slot is always 8-byte-wide (f32 is extended to f64) + ap.__overflow_arg_area += 8; + parmn[0..tsize] = p[0..tsize]; + } + } + else + { + assert(false, "unhandled va_arg type!"); + } assert(!arg2); } else From e53e8588f804137d2d805d16019c54d245a067c8 Mon Sep 17 00:00:00 2001 From: liushuyu Date: Tue, 28 Jan 2025 12:21:27 -0700 Subject: [PATCH 11/11] WIP threadasm.S --- runtime/druntime/src/core/threadasm.S | 49 ++++++++++++++------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/runtime/druntime/src/core/threadasm.S b/runtime/druntime/src/core/threadasm.S index 08675e14650..90bd7f3e9d0 100644 --- a/runtime/druntime/src/core/threadasm.S +++ b/runtime/druntime/src/core/threadasm.S @@ -744,58 +744,61 @@ fiber_switchContext: .cfi_offset 14, -48 .cfi_offset 15, -40 lgr %r1, %r15 - aghi %r15, -64 + aghi %r15, -224 /* we need to tell the debugger that the current stack offset is: - 64 (frame size) + 160 (parameter area size) */ - .cfi_def_cfa_offset 224 + 64 (frame size, for storing f8 ~ f15) + 160 (temp parameter area size) + 160 (last parameter area size) */ + .cfi_def_cfa_offset 384 /* store the (optional) backchain data */ - stg %r1, 0(%r15) + stg %r1, 64(%r15) /* Save callee-saved floating point registers s390x ABI has a very unique way for storing fp registers: even-pairs first and odd-pairs last */ std %f8, 0(%r15) - .cfi_offset 24, -224 + .cfi_offset 24, -384 std %f10, 8(%r15) - .cfi_offset 25, -216 + .cfi_offset 25, -376 std %f12, 16(%r15) - .cfi_offset 26, -208 + .cfi_offset 26, -368 std %f14, 24(%r15) - .cfi_offset 27, -200 + .cfi_offset 27, -360 std %f9, 32(%r15) - .cfi_offset 28, -192 + .cfi_offset 28, -352 std %f11, 40(%r15) - .cfi_offset 29, -184 + .cfi_offset 29, -344 std %f13, 48(%r15) - .cfi_offset 30, -176 + .cfi_offset 30, -336 std %f15, 56(%r15) - .cfi_offset 31, -168 + .cfi_offset 31, -328 /* Save stack pointer, the stack pointer is adjusted so that GC won't see the float point registers */ - stg %r15, 0(%r2) + la %r1, 64(%r15) + stg %r1, 0(%r2) /* Load the new context pointer as stack pointer. */ lgr %r15, %r3 - .cfi_def_cfa_offset 224 + .cfi_def_cfa_offset 320 + aghi %r15, -64 + .cfi_def_cfa_offset 384 /* Restore call-saved floating point registers. */ ld %f8, 0(%r15) - .cfi_offset 24, -224 + .cfi_offset 24, -384 ld %f10, 8(%r15) - .cfi_offset 25, -216 + .cfi_offset 25, -376 ld %f12, 16(%r15) - .cfi_offset 26, -208 + .cfi_offset 26, -368 ld %f14, 24(%r15) - .cfi_offset 27, -200 + .cfi_offset 27, -360 ld %f9, 32(%r15) - .cfi_offset 28, -192 + .cfi_offset 28, -352 ld %f11, 40(%r15) - .cfi_offset 29, -184 + .cfi_offset 29, -344 ld %f13, 48(%r15) - .cfi_offset 30, -176 + .cfi_offset 30, -336 ld %f15, 56(%r15) - .cfi_offset 31, -168 - lmg %r6, %r14, 112(%r15) + .cfi_offset 31, -328 + lmg %r6, %r14, 272(%r15) .cfi_offset 6, -112 .cfi_offset 7, -104 .cfi_offset 8, -96