From 1e7bae65386cc3f4dd5b6ee5e3495c3dfc5c769a Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Fri, 25 Apr 2025 14:04:14 +0200 Subject: [PATCH 1/6] Add EVMVersion::current() helper --- liblangutil/EVMVersion.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/liblangutil/EVMVersion.h b/liblangutil/EVMVersion.h index a159f2e4bb2f..cec638ed38d3 100644 --- a/liblangutil/EVMVersion.h +++ b/liblangutil/EVMVersion.h @@ -28,8 +28,6 @@ #include #include -#include - namespace solidity::evmasm { @@ -44,13 +42,13 @@ namespace solidity::langutil * A version specifier of the EVM we want to compile to. * Defaults to the latest version deployed on Ethereum Mainnet at the time of compiler release. */ -class EVMVersion: - boost::less_than_comparable, - boost::equality_comparable +class EVMVersion { public: EVMVersion() = default; + static EVMVersion current() { return {currentVersion}; } + static EVMVersion homestead() { return {Version::Homestead}; } static EVMVersion tangerineWhistle() { return {Version::TangerineWhistle}; } static EVMVersion spuriousDragon() { return {Version::SpuriousDragon}; } @@ -96,11 +94,10 @@ class EVMVersion: static EVMVersion firstWithEOF() { return {Version::Osaka}; } bool isExperimental() const { - return *this > EVMVersion{}; + return m_version > currentVersion; } - bool operator==(EVMVersion const& _other) const { return m_version == _other.m_version; } - bool operator<(EVMVersion const& _other) const { return m_version < _other.m_version; } + auto operator<=>(EVMVersion const&) const = default; std::string name() const { @@ -164,10 +161,11 @@ class EVMVersion: Prague, Osaka, }; + static auto constexpr currentVersion = Version::Cancun; EVMVersion(Version _version): m_version(_version) {} - Version m_version = Version::Cancun; + Version m_version = currentVersion; }; } From ba175b6912986bac111d647a5bb61221e1e6eb7c Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Thu, 20 Mar 2025 09:12:05 +0100 Subject: [PATCH 2/6] First define _all_ EVM functions, then declare them based on EVM version and configuration --- libyul/backends/evm/EVMDialect.cpp | 611 ++++++++++++++--------- libyul/backends/evm/EVMDialect.h | 2 +- libyul/backends/evm/NoOutputAssembly.cpp | 41 +- 3 files changed, 404 insertions(+), 250 deletions(-) diff --git a/libyul/backends/evm/EVMDialect.cpp b/libyul/backends/evm/EVMDialect.cpp index d68ef16348b1..87cd876ae254 100644 --- a/libyul/backends/evm/EVMDialect.cpp +++ b/libyul/backends/evm/EVMDialect.cpp @@ -21,17 +21,21 @@ #include +#include #include #include -#include + #include #include #include #include #include -#include +#include + #include +#include +#include #include #include @@ -56,55 +60,32 @@ std::tuple constexpr verbatimIndexToArgsAndRets(size_t _index) return std::make_tuple(_index - numRets * EVMDialect::verbatimMaxInputSlots, numRets); } -BuiltinFunctionForEVM createEVMFunction( - langutil::EVMVersion _evmVersion, - std::string const& _name, - evmasm::Instruction _instruction -) +bool isLowLevelStackManipulationInstruction(evmasm::Instruction const& _instruction) { - BuiltinFunctionForEVM f; - evmasm::InstructionInfo info = evmasm::instructionInfo(_instruction, _evmVersion); - f.name = _name; - f.numParameters = static_cast(info.args); - f.numReturns = static_cast(info.ret); - f.sideEffects = EVMDialect::sideEffectsOfInstruction(_instruction); - f.controlFlowSideEffects = ControlFlowSideEffects::fromInstruction(_instruction); - f.isMSize = _instruction == evmasm::Instruction::MSIZE; - f.literalArguments.clear(); - f.instruction = _instruction; - f.generateCode = [_instruction]( - FunctionCall const&, - AbstractAssembly& _assembly, - BuiltinContext& - ) { - _assembly.appendInstruction(_instruction); - }; - return f; + return + _instruction == evmasm::Instruction::SWAPN || + evmasm::SemanticInformation::isSwapInstruction(_instruction) || + _instruction == evmasm::Instruction::DUPN || + evmasm::SemanticInformation::isDupInstruction(_instruction) || + isPushInstruction(_instruction); } -BuiltinFunctionForEVM createFunction( - std::string const& _name, - size_t _params, - size_t _returns, - SideEffects _sideEffects, - ControlFlowSideEffects _controlFlowSideEffects, - std::vector> _literalArguments, - std::function _generateCode -) +bool isLowLevelControlFlowInstruction(evmasm::Instruction const& _instruction) { - yulAssert(_literalArguments.size() == _params || _literalArguments.empty(), ""); - - BuiltinFunctionForEVM f; - f.name = _name; - f.numParameters = _params; - f.numReturns = _returns; - f.sideEffects = _sideEffects; - f.controlFlowSideEffects = _controlFlowSideEffects; - f.literalArguments = std::move(_literalArguments); - f.isMSize = false; - f.instruction = {}; - f.generateCode = std::move(_generateCode); - return f; + switch (_instruction) + { + case evmasm::Instruction::JUMP: + case evmasm::Instruction::JUMPI: + case evmasm::Instruction::JUMPDEST: + case evmasm::Instruction::JUMPF: + case evmasm::Instruction::RJUMP: + case evmasm::Instruction::RJUMPI: + case evmasm::Instruction::RETF: + case evmasm::Instruction::CALLF: + return true; + default: + return false; + } } std::set> createReservedIdentifiers(langutil::EVMVersion _evmVersion, std::optional _eofVersion) @@ -195,47 +176,7 @@ std::set> createReservedIdentifiers(langutil::EVMVersio return reserved; } -std::vector> createBuiltins(langutil::EVMVersion _evmVersion, std::optional _eofVersion, bool _objectAccess) -{ - - // Exclude prevrandao as builtin for VMs before paris and difficulty for VMs after paris. - auto prevRandaoException = [&](std::string const& _instrName) -> bool - { - return (_instrName == "prevrandao" && _evmVersion < langutil::EVMVersion::paris()) || (_instrName == "difficulty" && _evmVersion >= langutil::EVMVersion::paris()); - }; - - std::vector> builtins; - for (auto const& instr: evmasm::c_instructions) - { - std::string name = toLower(instr.first); - auto const opcode = instr.second; - - if ( - !(opcode >= evmasm::Instruction::DUP1 && opcode <= evmasm::Instruction::DUP16) && - !(opcode >= evmasm::Instruction::SWAP1 && opcode <= evmasm::Instruction::SWAP16) && - !evmasm::isPushInstruction(opcode) && - opcode != evmasm::Instruction::JUMP && - opcode != evmasm::Instruction::JUMPI && - opcode != evmasm::Instruction::JUMPDEST && - opcode != evmasm::Instruction::DATALOADN && - opcode != evmasm::Instruction::EOFCREATE && - opcode != evmasm::Instruction::RETURNCONTRACT && - opcode != evmasm::Instruction::RJUMP && - opcode != evmasm::Instruction::RJUMPI && - opcode != evmasm::Instruction::CALLF && - opcode != evmasm::Instruction::JUMPF && - opcode != evmasm::Instruction::DUPN && - opcode != evmasm::Instruction::SWAPN && - opcode != evmasm::Instruction::RETF && - _evmVersion.hasOpcode(opcode, _eofVersion) && - !prevRandaoException(name) - ) - builtins.emplace_back(createEVMFunction(_evmVersion, name, opcode)); - else - builtins.emplace_back(std::nullopt); - } - - auto const createIfObjectAccess = [_objectAccess]( +BuiltinFunctionForEVM createFunction( std::string const& _name, size_t _params, size_t _returns, @@ -243,64 +184,200 @@ std::vector> createBuiltins(langutil::EVMVe ControlFlowSideEffects _controlFlowSideEffects, std::vector> _literalArguments, std::function _generateCode - ) -> std::optional + ) +{ + yulAssert(_literalArguments.size() == _params || _literalArguments.empty(), ""); + + BuiltinFunctionForEVM f; + f.name = _name; + f.numParameters = _params; + f.numReturns = _returns; + f.sideEffects = _sideEffects; + f.controlFlowSideEffects = _controlFlowSideEffects; + f.literalArguments = std::move(_literalArguments); + f.isMSize = false; + f.instruction = {}; + f.generateCode = std::move(_generateCode); + return f; +} + +class BuiltinFunctionCollection +{ + static size_t constexpr instructionBit = 0; + static size_t constexpr replacedInstructionBit = 1; + static size_t constexpr objectAccessBit = 2; + static size_t constexpr requiresEOFBit = 3; + static size_t constexpr requiresNonEOFBit = 4; +public: + struct BuiltinDescription { - if (!_objectAccess) - return std::nullopt; - return createFunction(_name, _params, _returns, _sideEffects, _controlFlowSideEffects, std::move(_literalArguments), std::move(_generateCode)); + /// whether the corresponding evm builtin function is an instruction builtin + bool instruction() const { return value.test(instructionBit); } + /// whether the corresponding evm builtin has been replaced by another builtin, ie, should be skipped + bool replaced() const { return value.test(replacedInstructionBit); } + /// if true, the evm builtin function is only valid when object access is given + bool requiresObjectAccess() const { return value.test(objectAccessBit); } + /// if true, the evm builtin function is only valid if EOF is enabled + bool requiresEOF() const { return value.test(requiresEOFBit); } + /// if true, the evm builtin function is only valid if EOF is not enabled + bool requiresNonEOF() const { return value.test(requiresNonEOFBit); } + + BuiltinDescription operator|(BuiltinDescription const& _other) const + { + return { value | _other.value }; + } + + std::bitset<5> value; }; - builtins.emplace_back(createIfObjectAccess("linkersymbol", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, []( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& - ) { - yulAssert(_call.arguments.size() == 1, ""); - Expression const& arg = _call.arguments.front(); - _assembly.appendLinkerSymbol(formatLiteral(std::get(arg))); - })); - builtins.emplace_back(createIfObjectAccess( - "memoryguard", - 1, - 1, - SideEffects{}, - ControlFlowSideEffects{}, - {LiteralKind::Number}, - []( - FunctionCall const& _call, + static std::vector> const& builtins() + { + static BuiltinFunctionCollection const instance; + return instance.m_builtinFunctions; + } + +private: + static BuiltinDescription constexpr instruction{1 << instructionBit}; + static BuiltinDescription constexpr replaced{1 << replacedInstructionBit}; + static BuiltinDescription constexpr objectAccess{1 << objectAccessBit}; + static BuiltinDescription constexpr requiresEOF{1 << requiresEOFBit}; + static BuiltinDescription constexpr requiresNonEOF{1 << requiresNonEOFBit}; + + BuiltinFunctionCollection() + { + for (auto const& [name, opcode]: evmasm::c_instructions) + { + if (evmasm::SemanticInformation::isSwapInstruction(opcode) || evmasm::SemanticInformation::isDupInstruction(opcode)) + continue; + + if ( + opcode != evmasm::Instruction::DATALOADN && + opcode != evmasm::Instruction::EOFCREATE && + opcode != evmasm::Instruction::RETURNCONTRACT + ) + { + // difficulty was replaced by prevrandao after london + if (opcode == evmasm::Instruction::PREVRANDAO && name == "DIFFICULTY") + m_builtinFunctions.emplace_back(instruction, instructionBuiltin(opcode, langutil::EVMVersion::london())); + else + m_builtinFunctions.emplace_back(instruction, instructionBuiltin(opcode, langutil::EVMVersion::current())); + } + else + // these opcodes are replaced by explicit builtin functions + m_builtinFunctions.emplace_back(instruction | replaced, BuiltinFunctionForEVM{}); + } + + m_builtinFunctions.emplace_back(objectAccess, linkersymbolBuiltin()); + m_builtinFunctions.emplace_back(objectAccess, memoryguardBuiltin()); + + m_builtinFunctions.emplace_back(objectAccess | requiresNonEOF, datasizeBuiltin()); + m_builtinFunctions.emplace_back(objectAccess | requiresNonEOF, dataoffsetBuiltin()); + m_builtinFunctions.emplace_back(objectAccess | requiresNonEOF, datacopyBuiltin()); + m_builtinFunctions.emplace_back(objectAccess | requiresNonEOF, setimmutableBuiltin()); + m_builtinFunctions.emplace_back(objectAccess | requiresNonEOF, loadimmutableBuiltin()); + + m_builtinFunctions.emplace_back(objectAccess | requiresEOF, auxdataloadnBuiltin()); + m_builtinFunctions.emplace_back(objectAccess | requiresEOF, eofcreateBuiltin()); + m_builtinFunctions.emplace_back(objectAccess | requiresEOF, returncontractBuiltin()); + + yulAssert( + ranges::all_of(m_builtinFunctions, [](std::tuple const& _builtin) { + return std::get<1>(_builtin).name.substr(0, "verbatim_"s.size()) != "verbatim_"; + }), + "Builtin functions besides verbatim should not start with the verbatim_ prefix." + ); + } + + static BuiltinFunctionForEVM instructionBuiltin(evmasm::Instruction const& _instruction, langutil::EVMVersion const& _evmVersion) + { + evmasm::InstructionInfo const info = evmasm::instructionInfo(_instruction, _evmVersion); + BuiltinFunctionForEVM f; + f.name = toLower(info.name); + f.numParameters = static_cast(info.args); + f.numReturns = static_cast(info.ret); + f.sideEffects = EVMDialect::sideEffectsOfInstruction(_instruction); + f.controlFlowSideEffects = ControlFlowSideEffects::fromInstruction(_instruction); + f.isMSize = _instruction == evmasm::Instruction::MSIZE; + f.literalArguments.clear(); + f.instruction = _instruction; + f.generateCode = [_instruction]( + FunctionCall const&, AbstractAssembly& _assembly, BuiltinContext& - ) { - yulAssert(_call.arguments.size() == 1, ""); - Literal const* literal = std::get_if(&_call.arguments.front()); - yulAssert(literal, ""); - _assembly.appendConstant(literal->value.value()); - }) - ); - if (!_eofVersion.has_value()) + ) + { + _assembly.appendInstruction(_instruction); + }; + return f; + } + + static BuiltinFunctionForEVM linkersymbolBuiltin() { - builtins.emplace_back(createIfObjectAccess("datasize", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, []( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& _context - ) { - yulAssert(_context.currentObject, "No object available."); - yulAssert(_call.arguments.size() == 1, ""); - Expression const& arg = _call.arguments.front(); - YulName const dataName (formatLiteral(std::get(arg))); - if (_context.currentObject->name == dataName.str()) - _assembly.appendAssemblySize(); - else - { - std::vector subIdPath = - _context.subIDs.count(dataName.str()) == 0 ? - _context.currentObject->pathToSubObject(dataName.str()) : - std::vector{_context.subIDs.at(dataName.str())}; - yulAssert(!subIdPath.empty(), "Could not find assembly object <" + dataName.str() + ">."); - _assembly.appendDataSize(subIdPath); + return createFunction( + "linkersymbol", + 1, + 1, + SideEffects{}, + ControlFlowSideEffects{}, + {LiteralKind::String}, + [](FunctionCall const& _call, AbstractAssembly& _assembly, BuiltinContext&) { + yulAssert(_call.arguments.size() == 1, ""); + Expression const& arg = _call.arguments.front(); + _assembly.appendLinkerSymbol(formatLiteral(std::get(arg))); } - })); - builtins.emplace_back(createIfObjectAccess("dataoffset", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, []( + ); + } + + static BuiltinFunctionForEVM memoryguardBuiltin() + { + return createFunction( + "memoryguard", + 1, + 1, + SideEffects{}, + ControlFlowSideEffects{}, + {LiteralKind::Number}, + [](FunctionCall const& _call, AbstractAssembly& _assembly, BuiltinContext&) { + yulAssert(_call.arguments.size() == 1, ""); + Literal const* literal = std::get_if(&_call.arguments.front()); + yulAssert(literal, ""); + _assembly.appendConstant(literal->value.value()); + } + ); + } + + static BuiltinFunctionForEVM datasizeBuiltin() + { + return createFunction( + "datasize", + 1, + 1, + SideEffects{}, + ControlFlowSideEffects{}, + {LiteralKind::String}, + [](FunctionCall const& _call, AbstractAssembly& _assembly, BuiltinContext& _context) { + yulAssert(_context.currentObject, "No object available."); + yulAssert(_call.arguments.size() == 1, ""); + Expression const& arg = _call.arguments.front(); + YulName const dataName (formatLiteral(std::get(arg))); + if (_context.currentObject->name == dataName.str()) + _assembly.appendAssemblySize(); + else + { + std::vector subIdPath = + _context.subIDs.count(dataName.str()) == 0 ? + _context.currentObject->pathToSubObject(dataName.str()) : + std::vector{_context.subIDs.at(dataName.str())}; + yulAssert(!subIdPath.empty(), "Could not find assembly object <" + dataName.str() + ">."); + _assembly.appendDataSize(subIdPath); + } + } + ); + } + + static BuiltinFunctionForEVM dataoffsetBuiltin() + { + return createFunction("dataoffset", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, []( FunctionCall const& _call, AbstractAssembly& _assembly, BuiltinContext& _context @@ -320,8 +397,12 @@ std::vector> createBuiltins(langutil::EVMVe yulAssert(!subIdPath.empty(), "Could not find assembly object <" + dataName.str() + ">."); _assembly.appendDataOffset(subIdPath); } - })); - builtins.emplace_back(createIfObjectAccess( + }); + } + + static BuiltinFunctionForEVM datacopyBuiltin() + { + return createFunction( "datacopy", 3, 0, @@ -335,8 +416,12 @@ std::vector> createBuiltins(langutil::EVMVe ) { _assembly.appendInstruction(evmasm::Instruction::CODECOPY); } - )); - builtins.emplace_back(createIfObjectAccess( + ); + } + + static BuiltinFunctionForEVM setimmutableBuiltin() + { + return createFunction( "setimmutable", 3, 0, @@ -362,8 +447,12 @@ std::vector> createBuiltins(langutil::EVMVe auto const identifier = (formatLiteral(std::get(_call.arguments[1]))); _assembly.appendImmutableAssignment(identifier); } - )); - builtins.emplace_back(createIfObjectAccess( + ); + } + + static BuiltinFunctionForEVM loadimmutableBuiltin() + { + return createFunction( "loadimmutable", 1, 1, @@ -378,86 +467,131 @@ std::vector> createBuiltins(langutil::EVMVe yulAssert(_call.arguments.size() == 1, ""); _assembly.appendImmutable(formatLiteral(std::get(_call.arguments.front()))); } - )); + ); + } + + static BuiltinFunctionForEVM auxdataloadnBuiltin() + { + return createFunction( + "auxdataloadn", + 1, + 1, + EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::DATALOADN), + ControlFlowSideEffects::fromInstruction(evmasm::Instruction::DATALOADN), + {LiteralKind::Number}, + []( + FunctionCall const& _call, + AbstractAssembly& _assembly, + BuiltinContext& + ) { + yulAssert(_call.arguments.size() == 1); + Literal const* literal = std::get_if(&_call.arguments.front()); + yulAssert(literal, ""); + yulAssert(literal->value.value() <= std::numeric_limits::max()); + _assembly.appendAuxDataLoadN(static_cast(literal->value.value())); + } + ); + } + + static BuiltinFunctionForEVM eofcreateBuiltin() + { + return createFunction( + "eofcreate", + 5, + 1, + EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::EOFCREATE), + ControlFlowSideEffects::fromInstruction(evmasm::Instruction::EOFCREATE), + {LiteralKind::String, std::nullopt, std::nullopt, std::nullopt, std::nullopt}, + []( + FunctionCall const& _call, + AbstractAssembly& _assembly, + BuiltinContext& context + ) { + yulAssert(_call.arguments.size() == 5); + Literal const* literal = std::get_if(&_call.arguments.front()); + auto const formattedLiteral = formatLiteral(*literal); + yulAssert(!util::contains(formattedLiteral, '.')); + auto const* containerID = valueOrNullptr(context.subIDs, formattedLiteral); + yulAssert(containerID != nullptr); + yulAssert(*containerID <= std::numeric_limits::max()); + _assembly.appendEOFCreate(static_cast(*containerID)); + } + ); } - else // EOF context + + static BuiltinFunctionForEVM returncontractBuiltin() { - if (_objectAccess) + return createFunction( + "returncontract", + 3, + 0, + EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::RETURNCONTRACT), + ControlFlowSideEffects::fromInstruction(evmasm::Instruction::RETURNCONTRACT), + {LiteralKind::String, std::nullopt, std::nullopt}, + []( + FunctionCall const& _call, + AbstractAssembly& _assembly, + BuiltinContext& context + ) { + yulAssert(_call.arguments.size() == 3); + Literal const* literal = std::get_if(&_call.arguments.front()); + yulAssert(literal); + auto const formattedLiteral = formatLiteral(*literal); + yulAssert(!util::contains(formattedLiteral, '.')); + auto const* containerID = valueOrNullptr(context.subIDs, formattedLiteral); + yulAssert(containerID != nullptr); + yulAssert(*containerID <= std::numeric_limits::max()); + _assembly.appendReturnContract(static_cast(*containerID)); + } + ); + } + + std::vector> m_builtinFunctions; +}; + +/// Make sure to only add builtins in a way that is consistent over EVM versions. If the order depends on the +/// EVM version - which can easily happen using conditionals -, different dialects' builtin handles +/// become inherently incompatible. +std::vector createBuiltins(langutil::EVMVersion _evmVersion, std::optional _eofVersion, bool _objectAccess) +{ + std::vector builtins; + builtins.reserve(BuiltinFunctionCollection::builtins().size()); + + for (auto const& [description, builtin]: BuiltinFunctionCollection::builtins()) + { + bool builtinShouldBeAdded = true; + if (description.instruction()) { - builtins.emplace_back(createFunction( - "auxdataloadn", - 1, - 1, - EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::DATALOADN), - ControlFlowSideEffects::fromInstruction(evmasm::Instruction::DATALOADN), - {LiteralKind::Number}, - []( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& - ) { - yulAssert(_call.arguments.size() == 1); - Literal const* literal = std::get_if(&_call.arguments.front()); - yulAssert(literal, ""); - yulAssert(literal->value.value() <= std::numeric_limits::max()); - _assembly.appendAuxDataLoadN(static_cast(literal->value.value())); - } - )); - - builtins.emplace_back(createFunction( - "eofcreate", - 5, - 1, - EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::EOFCREATE), - ControlFlowSideEffects::fromInstruction(evmasm::Instruction::EOFCREATE), - {LiteralKind::String, std::nullopt, std::nullopt, std::nullopt, std::nullopt}, - []( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& context - ) { - yulAssert(_call.arguments.size() == 5); - Literal const* literal = std::get_if(&_call.arguments.front()); - auto const formattedLiteral = formatLiteral(*literal); - yulAssert(!util::contains(formattedLiteral, '.')); - auto const* containerID = valueOrNullptr(context.subIDs, formattedLiteral); - yulAssert(containerID != nullptr); - yulAssert(*containerID <= std::numeric_limits::max()); - _assembly.appendEOFCreate(static_cast(*containerID)); - } - )); - - builtins.emplace_back(createFunction( - "returncontract", - 3, - 0, - EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::RETURNCONTRACT), - ControlFlowSideEffects::fromInstruction(evmasm::Instruction::RETURNCONTRACT), - {LiteralKind::String, std::nullopt, std::nullopt}, - []( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& context - ) { - yulAssert(_call.arguments.size() == 3); - Literal const* literal = std::get_if(&_call.arguments.front()); - yulAssert(literal); - auto const formattedLiteral = formatLiteral(*literal); - yulAssert(!util::contains(formattedLiteral, '.')); - auto const* containerID = valueOrNullptr(context.subIDs, formattedLiteral); - yulAssert(containerID != nullptr); - yulAssert(*containerID <= std::numeric_limits::max()); - _assembly.appendReturnContract(static_cast(*containerID)); - } - )); + if (description.replaced()) + builtinShouldBeAdded = false; + else + { + // Exclude prevrandao as builtin for VMs before paris and difficulty for VMs after paris. + auto prevRandaoException = [&](std::string_view const _instrName) -> bool + { + return (_instrName == "prevrandao" && _evmVersion < langutil::EVMVersion::paris()) || (_instrName == "difficulty" && _evmVersion >= langutil::EVMVersion::paris()); + }; + + yulAssert(builtin.instruction); + auto const& _opcode = *builtin.instruction; + builtinShouldBeAdded = + !isLowLevelControlFlowInstruction(_opcode) && + !isLowLevelStackManipulationInstruction(_opcode) && + _evmVersion.hasOpcode(_opcode, _eofVersion) && + !prevRandaoException(builtin.name); + } } + + builtinShouldBeAdded &= !description.requiresObjectAccess() || _objectAccess; + builtinShouldBeAdded &= !description.requiresEOF() || _eofVersion.has_value(); + builtinShouldBeAdded &= !description.requiresNonEOF() || !_eofVersion.has_value(); + + if (builtinShouldBeAdded) + builtins.emplace_back(&builtin); + else + builtins.emplace_back(nullptr); } - yulAssert( - ranges::all_of(builtins, [](std::optional const& _builtinFunction){ - return !_builtinFunction || _builtinFunction->name.substr(0, "verbatim_"s.size()) != "verbatim_"; - }), - "Builtin functions besides verbatim should not start with the verbatim_ prefix." - ); + return builtins; } @@ -482,14 +616,14 @@ EVMDialect::EVMDialect(langutil::EVMVersion _evmVersion, std::optional // ids are offset by the maximum number of verbatim functions m_builtinFunctionsByName[maybeBuiltin->name] = BuiltinHandle{index + verbatimIDOffset}; - m_discardFunction = findBuiltin("pop"); - m_equalityFunction = findBuiltin("eq"); - m_booleanNegationFunction = findBuiltin("iszero"); - m_memoryStoreFunction = findBuiltin("mstore"); - m_memoryLoadFunction = findBuiltin("mload"); - m_storageStoreFunction = findBuiltin("sstore"); - m_storageLoadFunction = findBuiltin("sload"); - m_hashFunction = findBuiltin("keccak256"); + m_discardFunction = EVMDialect::findBuiltin("pop"); + m_equalityFunction = EVMDialect::findBuiltin("eq"); + m_booleanNegationFunction = EVMDialect::findBuiltin("iszero"); + m_memoryStoreFunction = EVMDialect::findBuiltin("mstore"); + m_memoryLoadFunction = EVMDialect::findBuiltin("mload"); + m_storageStoreFunction = EVMDialect::findBuiltin("sstore"); + m_storageLoadFunction = EVMDialect::findBuiltin("sload"); + m_hashFunction = EVMDialect::findBuiltin("keccak256"); m_auxiliaryBuiltinHandles.add = EVMDialect::findBuiltin("add"); m_auxiliaryBuiltinHandles.exp = EVMDialect::findBuiltin("exp"); @@ -529,8 +663,8 @@ BuiltinFunctionForEVM const& EVMDialect::builtin(BuiltinHandle const& _handle) c } yulAssert(_handle.id - verbatimIDOffset < m_functions.size()); - auto const& maybeBuiltin = m_functions[_handle.id - verbatimIDOffset]; - yulAssert(maybeBuiltin.has_value()); + auto const* maybeBuiltin = m_functions[_handle.id - verbatimIDOffset]; + yulAssert(maybeBuiltin); return *maybeBuiltin; } @@ -563,22 +697,7 @@ EVMDialect const& EVMDialect::strictAssemblyForEVMObjects(langutil::EVMVersion _ SideEffects EVMDialect::sideEffectsOfInstruction(evmasm::Instruction _instruction) { - auto translate = [](evmasm::SemanticInformation::Effect _e) -> SideEffects::Effect - { - return static_cast(_e); - }; - - return SideEffects{ - evmasm::SemanticInformation::movable(_instruction), - evmasm::SemanticInformation::movableApartFromEffects(_instruction), - evmasm::SemanticInformation::canBeRemoved(_instruction), - evmasm::SemanticInformation::canBeRemovedIfNoMSize(_instruction), - true, // cannotLoop - translate(evmasm::SemanticInformation::otherState(_instruction)), - translate(evmasm::SemanticInformation::storage(_instruction)), - translate(evmasm::SemanticInformation::memory(_instruction)), - translate(evmasm::SemanticInformation::transientStorage(_instruction)), - }; + return ranges::views::keys(m_builtinFunctionsByName) | ranges::to; } BuiltinFunctionForEVM EVMDialect::createVerbatimFunctionFromHandle(BuiltinHandle const& _handle) diff --git a/libyul/backends/evm/EVMDialect.h b/libyul/backends/evm/EVMDialect.h index 0440daade1ce..73136ef6e30b 100644 --- a/libyul/backends/evm/EVMDialect.h +++ b/libyul/backends/evm/EVMDialect.h @@ -125,7 +125,7 @@ class EVMDialect: public Dialect langutil::EVMVersion const m_evmVersion; std::optional m_eofVersion; std::unordered_map m_builtinFunctionsByName; - std::vector> m_functions; + std::vector m_functions; std::array, verbatimIDOffset> mutable m_verbatimFunctions{}; std::set> m_reserved; diff --git a/libyul/backends/evm/NoOutputAssembly.cpp b/libyul/backends/evm/NoOutputAssembly.cpp index 78025265f342..b5b168b14ea8 100644 --- a/libyul/backends/evm/NoOutputAssembly.cpp +++ b/libyul/backends/evm/NoOutputAssembly.cpp @@ -207,9 +207,44 @@ void NoOutputAssembly::appendReturnContract(ContainerID) NoOutputEVMDialect::NoOutputEVMDialect(EVMDialect const& _copyFrom): EVMDialect(_copyFrom.evmVersion(), _copyFrom.eofVersion(), _copyFrom.providesObjectAccess()) { - for (auto& fun: m_functions) - if (fun) - modifyBuiltinToNoOutput(*fun); + m_functions = [&] + { + // save the modified functions here + static std::vector const noOutputBuiltins = [] + { + std::vector modifiedBuiltins; + modifiedBuiltins.reserve(allBuiltins().functions().size()); + + for (auto const& [_, builtin]: allBuiltins().functions()) + { + auto noOutputFunction = builtin; + modifyBuiltinToNoOutput(noOutputFunction); + modifiedBuiltins.push_back(std::move(noOutputFunction)); + } + + return modifiedBuiltins; + }(); + + std::vector result; + result.reserve(m_functions.size()); + for (auto const* builtinFunction: m_functions) + { + if (builtinFunction) + { + auto it = noOutputBuiltins.find(builtinFunction); + if (it == noOutputBuiltins.end()) + { + auto noOutputFunction = *builtinFunction; + modifyBuiltinToNoOutput(noOutputFunction); + it = noOutputBuiltins.emplace(builtinFunction, std::move(noOutputFunction)).first; + } + result.emplace_back(&it->second); + } + else + result.emplace_back(nullptr); + } + return result; + }(); } BuiltinFunctionForEVM const& NoOutputEVMDialect::builtin(BuiltinHandle const& _handle) const From cdd83f9df11c410ff8974892ee2b37d0c55f25d2 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Mon, 28 Apr 2025 10:48:06 +0200 Subject: [PATCH 3/6] Separate out builtin function collection into its own class This makes for a better separation between the declarations and the logic that defines builtins. # Conflicts: # libyul/backends/evm/EVMDialect.cpp --- libyul/CMakeLists.txt | 2 + libyul/backends/evm/EVMBuiltins.cpp | 416 +++++++++++++++++++++ libyul/backends/evm/EVMBuiltins.h | 118 ++++++ libyul/backends/evm/EVMDialect.cpp | 444 ++--------------------- libyul/backends/evm/EVMDialect.h | 36 +- libyul/backends/evm/NoOutputAssembly.cpp | 14 +- 6 files changed, 571 insertions(+), 459 deletions(-) create mode 100644 libyul/backends/evm/EVMBuiltins.cpp create mode 100644 libyul/backends/evm/EVMBuiltins.h diff --git a/libyul/CMakeLists.txt b/libyul/CMakeLists.txt index fc3302a82332..149c2fb2b75a 100644 --- a/libyul/CMakeLists.txt +++ b/libyul/CMakeLists.txt @@ -58,6 +58,8 @@ add_library(yul backends/evm/ControlFlowGraphBuilder.h backends/evm/EthAssemblyAdapter.cpp backends/evm/EthAssemblyAdapter.h + backends/evm/EVMBuiltins.cpp + backends/evm/EVMBuiltins.h backends/evm/EVMCodeTransform.cpp backends/evm/EVMCodeTransform.h backends/evm/EVMDialect.cpp diff --git a/libyul/backends/evm/EVMBuiltins.cpp b/libyul/backends/evm/EVMBuiltins.cpp new file mode 100644 index 000000000000..b68e7f39154b --- /dev/null +++ b/libyul/backends/evm/EVMBuiltins.cpp @@ -0,0 +1,416 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +// SPDX-License-Identifier: GPL-3.0 + +#include + +#include +#include +#include + +#include + +#include + +#include + +using namespace solidity; +using namespace solidity::yul; + +namespace +{ + +BuiltinFunctionForEVM createFunction( + std::string const& _name, + size_t _params, + size_t _returns, + SideEffects _sideEffects, + ControlFlowSideEffects _controlFlowSideEffects, + std::vector> _literalArguments, + std::function _generateCode + ) +{ + yulAssert(_literalArguments.size() == _params || _literalArguments.empty(), ""); + + BuiltinFunctionForEVM f; + f.name = _name; + f.numParameters = _params; + f.numReturns = _returns; + f.sideEffects = _sideEffects; + f.controlFlowSideEffects = _controlFlowSideEffects; + f.literalArguments = std::move(_literalArguments); + f.isMSize = false; + f.instruction = {}; + f.generateCode = std::move(_generateCode); + return f; +} + +BuiltinFunctionForEVM instructionBuiltin(evmasm::Instruction const& _instruction, langutil::EVMVersion const& _evmVersion) +{ + evmasm::InstructionInfo const info = evmasm::instructionInfo(_instruction, _evmVersion); + BuiltinFunctionForEVM f; + f.name = util::toLower(info.name); + f.numParameters = static_cast(info.args); + f.numReturns = static_cast(info.ret); + f.sideEffects = EVMBuiltins::sideEffectsOfInstruction(_instruction); + f.controlFlowSideEffects = ControlFlowSideEffects::fromInstruction(_instruction); + f.isMSize = _instruction == evmasm::Instruction::MSIZE; + f.literalArguments.clear(); + f.instruction = _instruction; + f.generateCode = [_instruction]( + FunctionCall const&, + AbstractAssembly& _assembly, + BuiltinContext& + ) + { + _assembly.appendInstruction(_instruction); + }; + return f; +} + +BuiltinFunctionForEVM linkersymbolBuiltin() +{ + return createFunction( + "linkersymbol", + 1, + 1, + SideEffects{}, + ControlFlowSideEffects{}, + {LiteralKind::String}, + [](FunctionCall const& _call, AbstractAssembly& _assembly, BuiltinContext&) { + yulAssert(_call.arguments.size() == 1, ""); + Expression const& arg = _call.arguments.front(); + _assembly.appendLinkerSymbol(formatLiteral(std::get(arg))); + } + ); +} + +BuiltinFunctionForEVM memoryguardBuiltin() +{ + return createFunction( + "memoryguard", + 1, + 1, + SideEffects{}, + ControlFlowSideEffects{}, + {LiteralKind::Number}, + [](FunctionCall const& _call, AbstractAssembly& _assembly, BuiltinContext&) { + yulAssert(_call.arguments.size() == 1, ""); + Literal const* literal = std::get_if(&_call.arguments.front()); + yulAssert(literal, ""); + _assembly.appendConstant(literal->value.value()); + } + ); +} + +BuiltinFunctionForEVM datasizeBuiltin() +{ + return createFunction( + "datasize", + 1, + 1, + SideEffects{}, + ControlFlowSideEffects{}, + {LiteralKind::String}, + [](FunctionCall const& _call, AbstractAssembly& _assembly, BuiltinContext& _context) { + yulAssert(_context.currentObject, "No object available."); + yulAssert(_call.arguments.size() == 1, ""); + Expression const& arg = _call.arguments.front(); + YulName const dataName (formatLiteral(std::get(arg))); + if (_context.currentObject->name == dataName.str()) + _assembly.appendAssemblySize(); + else + { + std::vector subIdPath = + _context.subIDs.count(dataName.str()) == 0 ? + _context.currentObject->pathToSubObject(dataName.str()) : + std::vector{_context.subIDs.at(dataName.str())}; + yulAssert(!subIdPath.empty(), "Could not find assembly object <" + dataName.str() + ">."); + _assembly.appendDataSize(subIdPath); + } + } + ); +} + +BuiltinFunctionForEVM dataoffsetBuiltin() +{ + return createFunction("dataoffset", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, []( + FunctionCall const& _call, + AbstractAssembly& _assembly, + BuiltinContext& _context + ) { + yulAssert(_context.currentObject, "No object available."); + yulAssert(_call.arguments.size() == 1, ""); + Expression const& arg = _call.arguments.front(); + YulName const dataName (formatLiteral(std::get(arg))); + if (_context.currentObject->name == dataName.str()) + _assembly.appendConstant(0); + else + { + std::vector subIdPath = + _context.subIDs.count(dataName.str()) == 0 ? + _context.currentObject->pathToSubObject(dataName.str()) : + std::vector{_context.subIDs.at(dataName.str())}; + yulAssert(!subIdPath.empty(), "Could not find assembly object <" + dataName.str() + ">."); + _assembly.appendDataOffset(subIdPath); + } + }); +} + +BuiltinFunctionForEVM datacopyBuiltin() +{ + return createFunction( + "datacopy", + 3, + 0, + EVMBuiltins::sideEffectsOfInstruction(evmasm::Instruction::CODECOPY), + ControlFlowSideEffects::fromInstruction(evmasm::Instruction::CODECOPY), + {}, + []( + FunctionCall const&, + AbstractAssembly& _assembly, + BuiltinContext& + ) { + _assembly.appendInstruction(evmasm::Instruction::CODECOPY); + } + ); +} + +BuiltinFunctionForEVM setimmutableBuiltin() +{ + return createFunction( + "setimmutable", + 3, + 0, + SideEffects{ + false, // movable + false, // movableApartFromEffects + false, // canBeRemoved + false, // canBeRemovedIfNotMSize + true, // cannotLoop + SideEffects::None, // otherState + SideEffects::None, // storage + SideEffects::Write, // memory + SideEffects::None // transientStorage + }, + ControlFlowSideEffects{}, + {std::nullopt, LiteralKind::String, std::nullopt}, + []( + FunctionCall const& _call, + AbstractAssembly& _assembly, + BuiltinContext& + ) { + yulAssert(_call.arguments.size() == 3, ""); + auto const identifier = (formatLiteral(std::get(_call.arguments[1]))); + _assembly.appendImmutableAssignment(identifier); + } + ); +} + +BuiltinFunctionForEVM loadimmutableBuiltin() +{ + return createFunction( + "loadimmutable", + 1, + 1, + SideEffects{}, + ControlFlowSideEffects{}, + {LiteralKind::String}, + []( + FunctionCall const& _call, + AbstractAssembly& _assembly, + BuiltinContext& + ) { + yulAssert(_call.arguments.size() == 1, ""); + _assembly.appendImmutable(formatLiteral(std::get(_call.arguments.front()))); + } + ); +} + +BuiltinFunctionForEVM auxdataloadnBuiltin() +{ + return createFunction( + "auxdataloadn", + 1, + 1, + EVMBuiltins::sideEffectsOfInstruction(evmasm::Instruction::DATALOADN), + ControlFlowSideEffects::fromInstruction(evmasm::Instruction::DATALOADN), + {LiteralKind::Number}, + []( + FunctionCall const& _call, + AbstractAssembly& _assembly, + BuiltinContext& + ) { + yulAssert(_call.arguments.size() == 1); + Literal const* literal = std::get_if(&_call.arguments.front()); + yulAssert(literal, ""); + yulAssert(literal->value.value() <= std::numeric_limits::max()); + _assembly.appendAuxDataLoadN(static_cast(literal->value.value())); + } + ); +} + +BuiltinFunctionForEVM eofcreateBuiltin() +{ + return createFunction( + "eofcreate", + 5, + 1, + EVMBuiltins::sideEffectsOfInstruction(evmasm::Instruction::EOFCREATE), + ControlFlowSideEffects::fromInstruction(evmasm::Instruction::EOFCREATE), + {LiteralKind::String, std::nullopt, std::nullopt, std::nullopt, std::nullopt}, + []( + FunctionCall const& _call, + AbstractAssembly& _assembly, + BuiltinContext& context + ) { + yulAssert(_call.arguments.size() == 5); + Literal const* literal = std::get_if(&_call.arguments.front()); + auto const formattedLiteral = formatLiteral(*literal); + yulAssert(!util::contains(formattedLiteral, '.')); + auto const* containerID = util::valueOrNullptr(context.subIDs, formattedLiteral); + yulAssert(containerID != nullptr); + yulAssert(*containerID <= std::numeric_limits::max()); + _assembly.appendEOFCreate(static_cast(*containerID)); + } + ); +} + +BuiltinFunctionForEVM returncontractBuiltin() +{ + return createFunction( + "returncontract", + 3, + 0, + EVMBuiltins::sideEffectsOfInstruction(evmasm::Instruction::RETURNCONTRACT), + ControlFlowSideEffects::fromInstruction(evmasm::Instruction::RETURNCONTRACT), + {LiteralKind::String, std::nullopt, std::nullopt}, + []( + FunctionCall const& _call, + AbstractAssembly& _assembly, + BuiltinContext& context + ) { + yulAssert(_call.arguments.size() == 3); + Literal const* literal = std::get_if(&_call.arguments.front()); + yulAssert(literal); + auto const formattedLiteral = formatLiteral(*literal); + yulAssert(!util::contains(formattedLiteral, '.')); + auto const* containerID = util::valueOrNullptr(context.subIDs, formattedLiteral); + yulAssert(containerID != nullptr); + yulAssert(*containerID <= std::numeric_limits::max()); + _assembly.appendReturnContract(static_cast(*containerID)); + } + ); +} + +} + +EVMBuiltins::EVMBuiltins() +{ + for (auto const& [name, opcode]: evmasm::c_instructions) + { + if ( + evmasm::SemanticInformation::isSwapInstruction(opcode) || + evmasm::SemanticInformation::isDupInstruction(opcode) + ) + continue; + + // difficulty was replaced by prevrandao after london + if (opcode == evmasm::Instruction::PREVRANDAO && name == "DIFFICULTY") + m_functions.emplace_back(instruction, instructionBuiltin(opcode, langutil::EVMVersion::london())); + else + m_functions.emplace_back(instruction, instructionBuiltin(opcode, langutil::EVMVersion::current())); + + // these are replaced by 'proper' builtin functions + if ( + opcode == evmasm::Instruction::DATALOADN || + opcode == evmasm::Instruction::EOFCREATE || + opcode == evmasm::Instruction::RETURNCONTRACT + ) + std::get<0>(m_functions.back()) |= replaced; + } + + m_functions.emplace_back(objectAccess, linkersymbolBuiltin()); + m_functions.emplace_back(objectAccess, memoryguardBuiltin()); + + m_functions.emplace_back(objectAccess | requiresNonEOF, datasizeBuiltin()); + m_functions.emplace_back(objectAccess | requiresNonEOF, dataoffsetBuiltin()); + m_functions.emplace_back(objectAccess | requiresNonEOF, datacopyBuiltin()); + m_functions.emplace_back(objectAccess | requiresNonEOF, setimmutableBuiltin()); + m_functions.emplace_back(objectAccess | requiresNonEOF, loadimmutableBuiltin()); + + m_functions.emplace_back(objectAccess | requiresEOF, auxdataloadnBuiltin()); + m_functions.emplace_back(objectAccess | requiresEOF, eofcreateBuiltin()); + m_functions.emplace_back(objectAccess | requiresEOF, returncontractBuiltin()); + + using namespace std::string_literals; + yulAssert( + ranges::all_of( + m_functions, + [](std::tuple const& _builtin) + { return std::get<1>(_builtin).name.substr(0, "verbatim_"s.size()) != "verbatim_"; }), + "Builtin functions besides verbatim should not start with the verbatim_ prefix." + ); +} + +BuiltinFunctionForEVM EVMBuiltins::createVerbatimFunction(size_t const _arguments, size_t const _returnVariables) +{ + BuiltinFunctionForEVM builtinFunction = createFunction( + "verbatim_" + std::to_string(_arguments) + "i_" + std::to_string(_returnVariables) + "o", + 1 + _arguments, + _returnVariables, + SideEffects::worst(), + ControlFlowSideEffects::worst(), // Worst control flow side effects because verbatim can do anything. + std::vector>{LiteralKind::String} + std::vector>(_arguments), + [=]( + FunctionCall const& _call, + AbstractAssembly& _assembly, + BuiltinContext& + ) { + yulAssert(_call.arguments.size() == (1 + _arguments), ""); + Expression const& bytecode = _call.arguments.front(); + + _assembly.appendVerbatim( + util::asBytes(formatLiteral(std::get(bytecode))), + _arguments, + _returnVariables + ); + } + ); + builtinFunction.isMSize = true; + return builtinFunction; +} + +SideEffects EVMBuiltins::sideEffectsOfInstruction(evmasm::Instruction _instruction) +{ + auto translate = [](evmasm::SemanticInformation::Effect _e) -> SideEffects::Effect + { + return static_cast(_e); + }; + + return SideEffects{ + evmasm::SemanticInformation::movable(_instruction), + evmasm::SemanticInformation::movableApartFromEffects(_instruction), + evmasm::SemanticInformation::canBeRemoved(_instruction), + evmasm::SemanticInformation::canBeRemovedIfNoMSize(_instruction), + true, // cannotLoop + translate(evmasm::SemanticInformation::otherState(_instruction)), + translate(evmasm::SemanticInformation::storage(_instruction)), + translate(evmasm::SemanticInformation::memory(_instruction)), + translate(evmasm::SemanticInformation::transientStorage(_instruction)), + }; +} diff --git a/libyul/backends/evm/EVMBuiltins.h b/libyul/backends/evm/EVMBuiltins.h new file mode 100644 index 000000000000..d864746c55e8 --- /dev/null +++ b/libyul/backends/evm/EVMBuiltins.h @@ -0,0 +1,118 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +// SPDX-License-Identifier: GPL-3.0 + +#pragma once + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace solidity::yul +{ + +class Object; + +/// Context used during code generation. +struct BuiltinContext +{ + Object const* currentObject = nullptr; + /// Mapping from named objects to abstract assembly sub IDs. + std::map subIDs; + + std::map functionIDs; +}; + +struct BuiltinFunctionForEVM: public BuiltinFunction +{ + std::optional instruction; + /// Function to generate code for the given function call and append it to the abstract + /// assembly. Expects all non-literal arguments of the call to be on stack in reverse order + /// (i.e. right-most argument pushed first). + /// Expects the caller to set the source location. + std::function generateCode; +}; + +/// Collection of all possible EVM builtin functions. +/// Each builtin can have one (or multiple) scopes, which define whether, e.g., it requires object access. +/// Using this class as single source of truth for builtin functions makes sure that these are consistent over +/// EVM dialects. If the order were to depend on the EVM dialect - which can easily happen using conditionals -, +/// different dialects' builtin handles become inherently incompatible. +class EVMBuiltins +{ + static std::size_t constexpr instructionBit = 0; + static std::size_t constexpr replacedInstructionBit = 1; + static std::size_t constexpr objectAccessBit = 2; + static std::size_t constexpr requiresEOFBit = 3; + static std::size_t constexpr requiresNonEOFBit = 4; +public: + struct Scopes + { + /// whether the corresponding evm builtin function is an instruction builtin + bool instruction() const { return value.test(instructionBit); } + /// whether the corresponding evm builtin has been replaced by another builtin, ie, should be skipped + bool replaced() const { return value.test(replacedInstructionBit); } + /// if true, the evm builtin function is only valid when object access is given + bool requiresObjectAccess() const { return value.test(objectAccessBit); } + /// if true, the evm builtin function is only valid if EOF is enabled + bool requiresEOF() const { return value.test(requiresEOFBit); } + /// if true, the evm builtin function is only valid if EOF is not enabled + bool requiresNonEOF() const { return value.test(requiresNonEOFBit); } + + Scopes operator|(Scopes const& _other) const + { + Scopes result = *this; + result |= _other; + return result; + } + + Scopes& operator|=(Scopes const& _other) + { + value |= _other.value; + return *this; + } + + std::bitset<5> value; + }; + + EVMBuiltins(); + + std::vector> const& functions() const { return m_functions; } + + /// Creates a verbatim builtin function. These are not part of the usual builtin functions collection and + /// must be cached in the dialect creating them. + static BuiltinFunctionForEVM createVerbatimFunction(size_t _arguments, size_t _returnVariables); + static SideEffects sideEffectsOfInstruction(evmasm::Instruction _instruction); +private: + static Scopes constexpr instruction{1 << instructionBit}; + static Scopes constexpr replaced{1 << replacedInstructionBit}; + static Scopes constexpr objectAccess{1 << objectAccessBit}; + static Scopes constexpr requiresEOF{1 << requiresEOFBit}; + static Scopes constexpr requiresNonEOF{1 << requiresNonEOFBit}; + + std::vector> m_functions; +}; + +} diff --git a/libyul/backends/evm/EVMDialect.cpp b/libyul/backends/evm/EVMDialect.cpp index 87cd876ae254..c903b9c4dae0 100644 --- a/libyul/backends/evm/EVMDialect.cpp +++ b/libyul/backends/evm/EVMDialect.cpp @@ -29,7 +29,6 @@ #include #include #include -#include #include @@ -176,393 +175,22 @@ std::set> createReservedIdentifiers(langutil::EVMVersio return reserved; } -BuiltinFunctionForEVM createFunction( - std::string const& _name, - size_t _params, - size_t _returns, - SideEffects _sideEffects, - ControlFlowSideEffects _controlFlowSideEffects, - std::vector> _literalArguments, - std::function _generateCode - ) -{ - yulAssert(_literalArguments.size() == _params || _literalArguments.empty(), ""); - - BuiltinFunctionForEVM f; - f.name = _name; - f.numParameters = _params; - f.numReturns = _returns; - f.sideEffects = _sideEffects; - f.controlFlowSideEffects = _controlFlowSideEffects; - f.literalArguments = std::move(_literalArguments); - f.isMSize = false; - f.instruction = {}; - f.generateCode = std::move(_generateCode); - return f; -} - -class BuiltinFunctionCollection -{ - static size_t constexpr instructionBit = 0; - static size_t constexpr replacedInstructionBit = 1; - static size_t constexpr objectAccessBit = 2; - static size_t constexpr requiresEOFBit = 3; - static size_t constexpr requiresNonEOFBit = 4; -public: - struct BuiltinDescription - { - /// whether the corresponding evm builtin function is an instruction builtin - bool instruction() const { return value.test(instructionBit); } - /// whether the corresponding evm builtin has been replaced by another builtin, ie, should be skipped - bool replaced() const { return value.test(replacedInstructionBit); } - /// if true, the evm builtin function is only valid when object access is given - bool requiresObjectAccess() const { return value.test(objectAccessBit); } - /// if true, the evm builtin function is only valid if EOF is enabled - bool requiresEOF() const { return value.test(requiresEOFBit); } - /// if true, the evm builtin function is only valid if EOF is not enabled - bool requiresNonEOF() const { return value.test(requiresNonEOFBit); } - - BuiltinDescription operator|(BuiltinDescription const& _other) const - { - return { value | _other.value }; - } - - std::bitset<5> value; - }; - - static std::vector> const& builtins() - { - static BuiltinFunctionCollection const instance; - return instance.m_builtinFunctions; - } - -private: - static BuiltinDescription constexpr instruction{1 << instructionBit}; - static BuiltinDescription constexpr replaced{1 << replacedInstructionBit}; - static BuiltinDescription constexpr objectAccess{1 << objectAccessBit}; - static BuiltinDescription constexpr requiresEOF{1 << requiresEOFBit}; - static BuiltinDescription constexpr requiresNonEOF{1 << requiresNonEOFBit}; - - BuiltinFunctionCollection() - { - for (auto const& [name, opcode]: evmasm::c_instructions) - { - if (evmasm::SemanticInformation::isSwapInstruction(opcode) || evmasm::SemanticInformation::isDupInstruction(opcode)) - continue; - - if ( - opcode != evmasm::Instruction::DATALOADN && - opcode != evmasm::Instruction::EOFCREATE && - opcode != evmasm::Instruction::RETURNCONTRACT - ) - { - // difficulty was replaced by prevrandao after london - if (opcode == evmasm::Instruction::PREVRANDAO && name == "DIFFICULTY") - m_builtinFunctions.emplace_back(instruction, instructionBuiltin(opcode, langutil::EVMVersion::london())); - else - m_builtinFunctions.emplace_back(instruction, instructionBuiltin(opcode, langutil::EVMVersion::current())); - } - else - // these opcodes are replaced by explicit builtin functions - m_builtinFunctions.emplace_back(instruction | replaced, BuiltinFunctionForEVM{}); - } - - m_builtinFunctions.emplace_back(objectAccess, linkersymbolBuiltin()); - m_builtinFunctions.emplace_back(objectAccess, memoryguardBuiltin()); - - m_builtinFunctions.emplace_back(objectAccess | requiresNonEOF, datasizeBuiltin()); - m_builtinFunctions.emplace_back(objectAccess | requiresNonEOF, dataoffsetBuiltin()); - m_builtinFunctions.emplace_back(objectAccess | requiresNonEOF, datacopyBuiltin()); - m_builtinFunctions.emplace_back(objectAccess | requiresNonEOF, setimmutableBuiltin()); - m_builtinFunctions.emplace_back(objectAccess | requiresNonEOF, loadimmutableBuiltin()); - - m_builtinFunctions.emplace_back(objectAccess | requiresEOF, auxdataloadnBuiltin()); - m_builtinFunctions.emplace_back(objectAccess | requiresEOF, eofcreateBuiltin()); - m_builtinFunctions.emplace_back(objectAccess | requiresEOF, returncontractBuiltin()); - - yulAssert( - ranges::all_of(m_builtinFunctions, [](std::tuple const& _builtin) { - return std::get<1>(_builtin).name.substr(0, "verbatim_"s.size()) != "verbatim_"; - }), - "Builtin functions besides verbatim should not start with the verbatim_ prefix." - ); - } - - static BuiltinFunctionForEVM instructionBuiltin(evmasm::Instruction const& _instruction, langutil::EVMVersion const& _evmVersion) - { - evmasm::InstructionInfo const info = evmasm::instructionInfo(_instruction, _evmVersion); - BuiltinFunctionForEVM f; - f.name = toLower(info.name); - f.numParameters = static_cast(info.args); - f.numReturns = static_cast(info.ret); - f.sideEffects = EVMDialect::sideEffectsOfInstruction(_instruction); - f.controlFlowSideEffects = ControlFlowSideEffects::fromInstruction(_instruction); - f.isMSize = _instruction == evmasm::Instruction::MSIZE; - f.literalArguments.clear(); - f.instruction = _instruction; - f.generateCode = [_instruction]( - FunctionCall const&, - AbstractAssembly& _assembly, - BuiltinContext& - ) - { - _assembly.appendInstruction(_instruction); - }; - return f; - } - - static BuiltinFunctionForEVM linkersymbolBuiltin() - { - return createFunction( - "linkersymbol", - 1, - 1, - SideEffects{}, - ControlFlowSideEffects{}, - {LiteralKind::String}, - [](FunctionCall const& _call, AbstractAssembly& _assembly, BuiltinContext&) { - yulAssert(_call.arguments.size() == 1, ""); - Expression const& arg = _call.arguments.front(); - _assembly.appendLinkerSymbol(formatLiteral(std::get(arg))); - } - ); - } - - static BuiltinFunctionForEVM memoryguardBuiltin() - { - return createFunction( - "memoryguard", - 1, - 1, - SideEffects{}, - ControlFlowSideEffects{}, - {LiteralKind::Number}, - [](FunctionCall const& _call, AbstractAssembly& _assembly, BuiltinContext&) { - yulAssert(_call.arguments.size() == 1, ""); - Literal const* literal = std::get_if(&_call.arguments.front()); - yulAssert(literal, ""); - _assembly.appendConstant(literal->value.value()); - } - ); - } - - static BuiltinFunctionForEVM datasizeBuiltin() - { - return createFunction( - "datasize", - 1, - 1, - SideEffects{}, - ControlFlowSideEffects{}, - {LiteralKind::String}, - [](FunctionCall const& _call, AbstractAssembly& _assembly, BuiltinContext& _context) { - yulAssert(_context.currentObject, "No object available."); - yulAssert(_call.arguments.size() == 1, ""); - Expression const& arg = _call.arguments.front(); - YulName const dataName (formatLiteral(std::get(arg))); - if (_context.currentObject->name == dataName.str()) - _assembly.appendAssemblySize(); - else - { - std::vector subIdPath = - _context.subIDs.count(dataName.str()) == 0 ? - _context.currentObject->pathToSubObject(dataName.str()) : - std::vector{_context.subIDs.at(dataName.str())}; - yulAssert(!subIdPath.empty(), "Could not find assembly object <" + dataName.str() + ">."); - _assembly.appendDataSize(subIdPath); - } - } - ); - } - - static BuiltinFunctionForEVM dataoffsetBuiltin() - { - return createFunction("dataoffset", 1, 1, SideEffects{}, ControlFlowSideEffects{}, {LiteralKind::String}, []( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& _context - ) { - yulAssert(_context.currentObject, "No object available."); - yulAssert(_call.arguments.size() == 1, ""); - Expression const& arg = _call.arguments.front(); - YulName const dataName (formatLiteral(std::get(arg))); - if (_context.currentObject->name == dataName.str()) - _assembly.appendConstant(0); - else - { - std::vector subIdPath = - _context.subIDs.count(dataName.str()) == 0 ? - _context.currentObject->pathToSubObject(dataName.str()) : - std::vector{_context.subIDs.at(dataName.str())}; - yulAssert(!subIdPath.empty(), "Could not find assembly object <" + dataName.str() + ">."); - _assembly.appendDataOffset(subIdPath); - } - }); - } - - static BuiltinFunctionForEVM datacopyBuiltin() - { - return createFunction( - "datacopy", - 3, - 0, - EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::CODECOPY), - ControlFlowSideEffects::fromInstruction(evmasm::Instruction::CODECOPY), - {}, - []( - FunctionCall const&, - AbstractAssembly& _assembly, - BuiltinContext& - ) { - _assembly.appendInstruction(evmasm::Instruction::CODECOPY); - } - ); - } - - static BuiltinFunctionForEVM setimmutableBuiltin() - { - return createFunction( - "setimmutable", - 3, - 0, - SideEffects{ - false, // movable - false, // movableApartFromEffects - false, // canBeRemoved - false, // canBeRemovedIfNotMSize - true, // cannotLoop - SideEffects::None, // otherState - SideEffects::None, // storage - SideEffects::Write, // memory - SideEffects::None // transientStorage - }, - ControlFlowSideEffects{}, - {std::nullopt, LiteralKind::String, std::nullopt}, - []( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& - ) { - yulAssert(_call.arguments.size() == 3, ""); - auto const identifier = (formatLiteral(std::get(_call.arguments[1]))); - _assembly.appendImmutableAssignment(identifier); - } - ); - } - - static BuiltinFunctionForEVM loadimmutableBuiltin() - { - return createFunction( - "loadimmutable", - 1, - 1, - SideEffects{}, - ControlFlowSideEffects{}, - {LiteralKind::String}, - []( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& - ) { - yulAssert(_call.arguments.size() == 1, ""); - _assembly.appendImmutable(formatLiteral(std::get(_call.arguments.front()))); - } - ); - } - - static BuiltinFunctionForEVM auxdataloadnBuiltin() - { - return createFunction( - "auxdataloadn", - 1, - 1, - EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::DATALOADN), - ControlFlowSideEffects::fromInstruction(evmasm::Instruction::DATALOADN), - {LiteralKind::Number}, - []( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& - ) { - yulAssert(_call.arguments.size() == 1); - Literal const* literal = std::get_if(&_call.arguments.front()); - yulAssert(literal, ""); - yulAssert(literal->value.value() <= std::numeric_limits::max()); - _assembly.appendAuxDataLoadN(static_cast(literal->value.value())); - } - ); - } - - static BuiltinFunctionForEVM eofcreateBuiltin() - { - return createFunction( - "eofcreate", - 5, - 1, - EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::EOFCREATE), - ControlFlowSideEffects::fromInstruction(evmasm::Instruction::EOFCREATE), - {LiteralKind::String, std::nullopt, std::nullopt, std::nullopt, std::nullopt}, - []( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& context - ) { - yulAssert(_call.arguments.size() == 5); - Literal const* literal = std::get_if(&_call.arguments.front()); - auto const formattedLiteral = formatLiteral(*literal); - yulAssert(!util::contains(formattedLiteral, '.')); - auto const* containerID = valueOrNullptr(context.subIDs, formattedLiteral); - yulAssert(containerID != nullptr); - yulAssert(*containerID <= std::numeric_limits::max()); - _assembly.appendEOFCreate(static_cast(*containerID)); - } - ); - } - - static BuiltinFunctionForEVM returncontractBuiltin() - { - return createFunction( - "returncontract", - 3, - 0, - EVMDialect::sideEffectsOfInstruction(evmasm::Instruction::RETURNCONTRACT), - ControlFlowSideEffects::fromInstruction(evmasm::Instruction::RETURNCONTRACT), - {LiteralKind::String, std::nullopt, std::nullopt}, - []( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& context - ) { - yulAssert(_call.arguments.size() == 3); - Literal const* literal = std::get_if(&_call.arguments.front()); - yulAssert(literal); - auto const formattedLiteral = formatLiteral(*literal); - yulAssert(!util::contains(formattedLiteral, '.')); - auto const* containerID = valueOrNullptr(context.subIDs, formattedLiteral); - yulAssert(containerID != nullptr); - yulAssert(*containerID <= std::numeric_limits::max()); - _assembly.appendReturnContract(static_cast(*containerID)); - } - ); - } - - std::vector> m_builtinFunctions; -}; - -/// Make sure to only add builtins in a way that is consistent over EVM versions. If the order depends on the -/// EVM version - which can easily happen using conditionals -, different dialects' builtin handles -/// become inherently incompatible. -std::vector createBuiltins(langutil::EVMVersion _evmVersion, std::optional _eofVersion, bool _objectAccess) +std::vector createDialectBuiltins( + std::vector> const& _allBuiltins, + langutil::EVMVersion const _evmVersion, + std::optional const _eofVersion, + bool const _objectAccess +) { std::vector builtins; - builtins.reserve(BuiltinFunctionCollection::builtins().size()); + builtins.reserve(_allBuiltins.size()); - for (auto const& [description, builtin]: BuiltinFunctionCollection::builtins()) + for (auto const& [scopes, builtin]: _allBuiltins) { bool builtinShouldBeAdded = true; - if (description.instruction()) + if (scopes.instruction()) { - if (description.replaced()) + if (scopes.replaced()) builtinShouldBeAdded = false; else { @@ -582,9 +210,9 @@ std::vector createBuiltins(langutil::EVMVersion _e } } - builtinShouldBeAdded &= !description.requiresObjectAccess() || _objectAccess; - builtinShouldBeAdded &= !description.requiresEOF() || _eofVersion.has_value(); - builtinShouldBeAdded &= !description.requiresNonEOF() || !_eofVersion.has_value(); + builtinShouldBeAdded &= !scopes.requiresObjectAccess() || _objectAccess; + builtinShouldBeAdded &= !scopes.requiresEOF() || _eofVersion.has_value(); + builtinShouldBeAdded &= !scopes.requiresNonEOF() || !_eofVersion.has_value(); if (builtinShouldBeAdded) builtins.emplace_back(&builtin); @@ -603,12 +231,11 @@ std::regex const& verbatimPattern() } - EVMDialect::EVMDialect(langutil::EVMVersion _evmVersion, std::optional _eofVersion, bool _objectAccess): m_objectAccess(_objectAccess), m_evmVersion(_evmVersion), m_eofVersion(_eofVersion), - m_functions(createBuiltins(_evmVersion, _eofVersion, _objectAccess)), + m_functions(createDialectBuiltins(allBuiltins().functions(), _evmVersion, _eofVersion, _objectAccess)), m_reserved(createReservedIdentifiers(_evmVersion, _eofVersion)) { for (auto const& [index, maybeBuiltin]: m_functions | ranges::views::enumerate) @@ -668,13 +295,12 @@ BuiltinFunctionForEVM const& EVMDialect::builtin(BuiltinHandle const& _handle) c return *maybeBuiltin; } - bool EVMDialect::reservedIdentifier(std::string_view _name) const { if (m_objectAccess) if (_name.substr(0, "verbatim"s.size()) == "verbatim") return true; - return m_reserved.count(_name) != 0; + return m_reserved.contains(_name); } EVMDialect const& EVMDialect::strictAssemblyForEVM(langutil::EVMVersion _evmVersion, std::optional _eofVersion) @@ -695,42 +321,14 @@ EVMDialect const& EVMDialect::strictAssemblyForEVMObjects(langutil::EVMVersion _ return *dialects[{_evmVersion, _eofVersion}]; } -SideEffects EVMDialect::sideEffectsOfInstruction(evmasm::Instruction _instruction) +std::set EVMDialect::builtinFunctionNames() const { return ranges::views::keys(m_builtinFunctionsByName) | ranges::to; } BuiltinFunctionForEVM EVMDialect::createVerbatimFunctionFromHandle(BuiltinHandle const& _handle) { - return std::apply(createVerbatimFunction, verbatimIndexToArgsAndRets(_handle.id)); -} - -BuiltinFunctionForEVM EVMDialect::createVerbatimFunction(size_t _arguments, size_t _returnVariables) -{ - BuiltinFunctionForEVM builtinFunction = createFunction( - "verbatim_" + std::to_string(_arguments) + "i_" + std::to_string(_returnVariables) + "o", - 1 + _arguments, - _returnVariables, - SideEffects::worst(), - ControlFlowSideEffects::worst(), // Worst control flow side effects because verbatim can do anything. - std::vector>{LiteralKind::String} + std::vector>(_arguments), - [=]( - FunctionCall const& _call, - AbstractAssembly& _assembly, - BuiltinContext& - ) { - yulAssert(_call.arguments.size() == (1 + _arguments), ""); - Expression const& bytecode = _call.arguments.front(); - - _assembly.appendVerbatim( - asBytes(formatLiteral(std::get(bytecode))), - _arguments, - _returnVariables - ); - } - ); - builtinFunction.isMSize = true; - return builtinFunction; + return std::apply(EVMBuiltins::createVerbatimFunction, verbatimIndexToArgsAndRets(_handle.id)); } BuiltinHandle EVMDialect::verbatimFunction(size_t _arguments, size_t _returnVariables) const @@ -745,7 +343,13 @@ BuiltinHandle EVMDialect::verbatimFunction(size_t _arguments, size_t _returnVari auto& verbatimFunctionPtr = m_verbatimFunctions[verbatimIndex]; !verbatimFunctionPtr ) - verbatimFunctionPtr = std::make_unique(createVerbatimFunction(_arguments, _returnVariables)); + verbatimFunctionPtr = std::make_unique(EVMBuiltins::createVerbatimFunction(_arguments, _returnVariables)); return {verbatimIndex}; } + +EVMBuiltins const& EVMDialect::allBuiltins() +{ + static EVMBuiltins const builtins; + return builtins; +} diff --git a/libyul/backends/evm/EVMDialect.h b/libyul/backends/evm/EVMDialect.h index 73136ef6e30b..96723e9191f7 100644 --- a/libyul/backends/evm/EVMDialect.h +++ b/libyul/backends/evm/EVMDialect.h @@ -25,11 +25,15 @@ #include #include -#include +#include + #include -#include +#include +#include #include +#include +#include namespace solidity::yul { @@ -37,29 +41,6 @@ namespace solidity::yul struct FunctionCall; class Object; -/** - * Context used during code generation. - */ -struct BuiltinContext -{ - Object const* currentObject = nullptr; - /// Mapping from named objects to abstract assembly sub IDs. - std::map subIDs; - - std::map functionIDs; -}; - -struct BuiltinFunctionForEVM: public BuiltinFunction -{ - std::optional instruction; - /// Function to generate code for the given function call and append it to the abstract - /// assembly. Expects all non-literal arguments of the call to be on stack in reverse order - /// (i.e. right-most argument pushed first). - /// Expects the caller to set the source location. - std::function generateCode; -}; - - /** * Yul dialect for EVM as a backend. * The main difference is that the builtin functions take an AbstractAssembly for the @@ -108,19 +89,18 @@ class EVMDialect: public Dialect bool providesObjectAccess() const { return m_objectAccess; } - static SideEffects sideEffectsOfInstruction(evmasm::Instruction _instruction); - static size_t constexpr verbatimMaxInputSlots = 100; static size_t constexpr verbatimMaxOutputSlots = 100; protected: static bool constexpr isVerbatimHandle(BuiltinHandle const& _handle) { return _handle.id < verbatimIDOffset; } static BuiltinFunctionForEVM createVerbatimFunctionFromHandle(BuiltinHandle const& _handle); - static BuiltinFunctionForEVM createVerbatimFunction(size_t _arguments, size_t _returnVariables); BuiltinHandle verbatimFunction(size_t _arguments, size_t _returnVariables) const; static size_t constexpr verbatimIDOffset = verbatimMaxInputSlots * verbatimMaxOutputSlots; + static EVMBuiltins const& allBuiltins(); + bool const m_objectAccess; langutil::EVMVersion const m_evmVersion; std::optional m_eofVersion; diff --git a/libyul/backends/evm/NoOutputAssembly.cpp b/libyul/backends/evm/NoOutputAssembly.cpp index b5b168b14ea8..2c1a325150cf 100644 --- a/libyul/backends/evm/NoOutputAssembly.cpp +++ b/libyul/backends/evm/NoOutputAssembly.cpp @@ -26,6 +26,7 @@ #include +#include #include using namespace solidity; @@ -227,19 +228,10 @@ NoOutputEVMDialect::NoOutputEVMDialect(EVMDialect const& _copyFrom): std::vector result; result.reserve(m_functions.size()); - for (auto const* builtinFunction: m_functions) + for (auto const& [index, builtinFunction]: m_functions | ranges::views::enumerate) { if (builtinFunction) - { - auto it = noOutputBuiltins.find(builtinFunction); - if (it == noOutputBuiltins.end()) - { - auto noOutputFunction = *builtinFunction; - modifyBuiltinToNoOutput(noOutputFunction); - it = noOutputBuiltins.emplace(builtinFunction, std::move(noOutputFunction)).first; - } - result.emplace_back(&it->second); - } + result.emplace_back(&noOutputBuiltins[index]); else result.emplace_back(nullptr); } From 81bf3f4bdde4be582baf1995073227071960cad2 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Mon, 28 Apr 2025 14:23:42 +0200 Subject: [PATCH 4/6] Add allEOFVersions to EVMVersion --- liblangutil/EVMVersion.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/liblangutil/EVMVersion.h b/liblangutil/EVMVersion.h index cec638ed38d3..3d200731c9aa 100644 --- a/liblangutil/EVMVersion.h +++ b/liblangutil/EVMVersion.h @@ -23,6 +23,7 @@ #include +#include #include #include #include @@ -83,6 +84,14 @@ class EVMVersion }; } + static auto constexpr allEOFVersions() + { + return std::array{ + std::optional(), + std::make_optional(1) + }; + } + static std::optional fromString(std::string const& _version) { for (auto const& v: allVersions()) From 87acbd69d6190dfef4e657f10ea3961cf2c9465a Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Mon, 28 Apr 2025 14:24:04 +0200 Subject: [PATCH 5/6] EVMVersion::allVersions is constexpr --- liblangutil/EVMVersion.h | 37 ++++++++++++++++++------------------ solc/CommandLineParser.cpp | 2 +- test/tools/fuzzer_common.cpp | 2 +- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/liblangutil/EVMVersion.h b/liblangutil/EVMVersion.h index 3d200731c9aa..79c08d34355a 100644 --- a/liblangutil/EVMVersion.h +++ b/liblangutil/EVMVersion.h @@ -27,7 +27,6 @@ #include #include #include -#include namespace solidity::evmasm @@ -50,23 +49,23 @@ class EVMVersion static EVMVersion current() { return {currentVersion}; } - static EVMVersion homestead() { return {Version::Homestead}; } - static EVMVersion tangerineWhistle() { return {Version::TangerineWhistle}; } - static EVMVersion spuriousDragon() { return {Version::SpuriousDragon}; } - static EVMVersion byzantium() { return {Version::Byzantium}; } - static EVMVersion constantinople() { return {Version::Constantinople}; } - static EVMVersion petersburg() { return {Version::Petersburg}; } - static EVMVersion istanbul() { return {Version::Istanbul}; } - static EVMVersion berlin() { return {Version::Berlin}; } - static EVMVersion london() { return {Version::London}; } - static EVMVersion paris() { return {Version::Paris}; } - static EVMVersion shanghai() { return {Version::Shanghai}; } - static EVMVersion cancun() { return {Version::Cancun}; } - static EVMVersion prague() { return {Version::Prague}; } - static EVMVersion osaka() { return {Version::Osaka}; } - - static std::vector allVersions() { - return { + static EVMVersion constexpr homestead() { return {Version::Homestead}; } + static EVMVersion constexpr tangerineWhistle() { return {Version::TangerineWhistle}; } + static EVMVersion constexpr spuriousDragon() { return {Version::SpuriousDragon}; } + static EVMVersion constexpr byzantium() { return {Version::Byzantium}; } + static EVMVersion constexpr constantinople() { return {Version::Constantinople}; } + static EVMVersion constexpr petersburg() { return {Version::Petersburg}; } + static EVMVersion constexpr istanbul() { return {Version::Istanbul}; } + static EVMVersion constexpr berlin() { return {Version::Berlin}; } + static EVMVersion constexpr london() { return {Version::London}; } + static EVMVersion constexpr paris() { return {Version::Paris}; } + static EVMVersion constexpr shanghai() { return {Version::Shanghai}; } + static EVMVersion constexpr cancun() { return {Version::Cancun}; } + static EVMVersion constexpr prague() { return {Version::Prague}; } + static EVMVersion constexpr osaka() { return {Version::Osaka}; } + + static auto constexpr allVersions() { + return std::array{ homestead(), tangerineWhistle(), spuriousDragon(), @@ -172,7 +171,7 @@ class EVMVersion }; static auto constexpr currentVersion = Version::Cancun; - EVMVersion(Version _version): m_version(_version) {} + constexpr EVMVersion(Version _version): m_version(_version) {} Version m_version = currentVersion; }; diff --git a/solc/CommandLineParser.cpp b/solc/CommandLineParser.cpp index a382c5217377..50ce02bd14c8 100644 --- a/solc/CommandLineParser.cpp +++ b/solc/CommandLineParser.cpp @@ -596,7 +596,7 @@ General Information)").c_str(), auto const annotateEVMVersion = [](EVMVersion const& _version) { return _version.name() + (_version.isExperimental() ? " (experimental)" : ""); }; - std::vector allEVMVersions = EVMVersion::allVersions(); + static auto constexpr allEVMVersions = EVMVersion::allVersions(); std::string annotatedEVMVersions = util::joinHumanReadable( allEVMVersions | ranges::views::transform(annotateEVMVersion), ", ", diff --git a/test/tools/fuzzer_common.cpp b/test/tools/fuzzer_common.cpp index 6a70c1cc8f77..8cc30c199507 100644 --- a/test/tools/fuzzer_common.cpp +++ b/test/tools/fuzzer_common.cpp @@ -39,7 +39,7 @@ using namespace solidity::frontend; using namespace solidity::langutil; using namespace solidity::util; -static std::vector s_evmVersions = EVMVersion::allVersions(); +static auto constexpr s_evmVersions = EVMVersion::allVersions(); void FuzzerUtil::testCompilerJsonInterface(std::string const& _input, bool _optimize, bool _quiet) { From 8a7d6194244e9ac7d4f1c4a911d08f88a45f9309 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Thu, 20 Mar 2025 11:31:46 +0100 Subject: [PATCH 6/6] EVMDialect: add test that enforces builtin handle compatibility Between current default dialect as well as latest dialect and all other dialects. --- libyul/backends/evm/EVMDialect.h | 8 ++ test/CMakeLists.txt | 1 + test/libyul/EVMDialectCompatibility.cpp | 167 ++++++++++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 test/libyul/EVMDialectCompatibility.cpp diff --git a/libyul/backends/evm/EVMDialect.h b/libyul/backends/evm/EVMDialect.h index 96723e9191f7..2f43d62952c6 100644 --- a/libyul/backends/evm/EVMDialect.h +++ b/libyul/backends/evm/EVMDialect.h @@ -45,6 +45,10 @@ class Object; * Yul dialect for EVM as a backend. * The main difference is that the builtin functions take an AbstractAssembly for the * code generation. + * + * Builtins are defined so that their handles stay compatible over different dialect flavors - be it with/without + * object access, with/without EOF, different versions. It may be, of course, that these builtins are no longer defined. + * The ones that _are_ defined, though, remain the same. */ class EVMDialect: public Dialect { @@ -81,6 +85,8 @@ class EVMDialect: public Dialect AuxiliaryBuiltinHandles const& auxiliaryBuiltinHandles() const { return m_auxiliaryBuiltinHandles; } static EVMDialect const& strictAssemblyForEVM(langutil::EVMVersion _evmVersion, std::optional _eofVersion); + /// Builtins with and without object access are compatible, i.e., builtin handles without object access are not + /// invalidated. static EVMDialect const& strictAssemblyForEVMObjects(langutil::EVMVersion _evmVersion, std::optional _eofVersion); langutil::EVMVersion evmVersion() const { return m_evmVersion; } @@ -92,6 +98,8 @@ class EVMDialect: public Dialect static size_t constexpr verbatimMaxInputSlots = 100; static size_t constexpr verbatimMaxOutputSlots = 100; + std::set builtinFunctionNames() const; + protected: static bool constexpr isVerbatimHandle(BuiltinHandle const& _handle) { return _handle.id < verbatimIDOffset; } static BuiltinFunctionForEVM createVerbatimFunctionFromHandle(BuiltinHandle const& _handle); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c66963c190d4..b0cd991f7313 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -149,6 +149,7 @@ set(libyul_sources libyul/ControlFlowSideEffectsTest.h libyul/EVMCodeTransformTest.cpp libyul/EVMCodeTransformTest.h + libyul/EVMDialectCompatibility.cpp libyul/FunctionSideEffects.cpp libyul/FunctionSideEffects.h libyul/Inliner.cpp diff --git a/test/libyul/EVMDialectCompatibility.cpp b/test/libyul/EVMDialectCompatibility.cpp new file mode 100644 index 000000000000..6c8c5a60a1df --- /dev/null +++ b/test/libyul/EVMDialectCompatibility.cpp @@ -0,0 +1,167 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +// SPDX-License-Identifier: GPL-3.0 + +#include + +#include + +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include + +namespace bdata = boost::unit_test::data; + +using namespace solidity; +using namespace solidity::yul; + +namespace +{ + +struct EVMDialectConfigurationToTest +{ + EVMDialect const& dialect() const + { + return objectAccess ? EVMDialect::strictAssemblyForEVMObjects(evmVersion, eofVersion) : EVMDialect::strictAssemblyForEVM(evmVersion, eofVersion); + } + + friend std::ostream& operator<<(std::ostream& _out, EVMDialectConfigurationToTest const& _config) + { + _out << fmt::format( + "EVMConfigurationToTest[{}, eof={}, objectAccess={}]", + _config.evmVersion.name(), + _config.eofVersion.has_value() ? std::to_string(*_config.eofVersion) : "null", + _config.objectAccess + ); + return _out; + } + + langutil::EVMVersion evmVersion; + std::optional eofVersion; + bool objectAccess; +}; + +template +std::vector generateConfigs(EVMVersionCollection const& _evmVersions, std::vector const& _objectAccess = {false, true}) +{ + std::vector configs; + for (bool const objectAccess: _objectAccess) + for (auto const& eofVersion: langutil::EVMVersion::allEOFVersions()) + for (auto const& evmVersion: _evmVersions) + if (!eofVersion || evmVersion.supportsEOF()) + configs.push_back(EVMDialectConfigurationToTest{evmVersion, eofVersion, objectAccess}); + + return configs; +} +} + +BOOST_AUTO_TEST_SUITE(EVMDialectCompatibility) + +/// Test for both current and latest (source) EVM dialect that for all other (target) dialects and all builtins in the +/// source dialect, if the builtin exists for both source and target, they have the same handle. +/// Note: The comparison is packed into a single BOOST_REQUIRE to avoid massive amounts of output on cout. +BOOST_DATA_TEST_CASE( + builtin_function_handle_compatibility, + bdata::monomorphic::grid( + bdata::make(generateConfigs(std::array{langutil::EVMVersion::current(), langutil::EVMVersion::allVersions().back()})), + bdata::make(generateConfigs(langutil::EVMVersion::allVersions())) + ), + sourceDialectConfiguration, + evmDialectConfigurationToTest +) +{ + auto const& sourceDialect = sourceDialectConfiguration.dialect(); + auto const& dialectToTestAgainst = evmDialectConfigurationToTest.dialect(); + + std::set const builtinNames = sourceDialect.builtinFunctionNames(); + std::vector sourceHandles; + sourceHandles.reserve(builtinNames.size()); + std::vector> testHandles; + testHandles.reserve(builtinNames.size()); + + for (auto const& builtinFunctionName: builtinNames) + { + std::optional sourceHandle = sourceDialect.findBuiltin(builtinFunctionName); + soltestAssert(sourceHandle.has_value()); + sourceHandles.push_back(*sourceHandle); + testHandles.push_back(dialectToTestAgainst.findBuiltin(builtinFunctionName)); + } + + BOOST_REQUIRE([&]() -> boost::test_tools::predicate_result + { + boost::test_tools::predicate_result result{true}; + for (auto const& [name, sourceBuiltin, testBuiltin]: ranges::views::zip(builtinNames, sourceHandles, testHandles)) + if (testBuiltin && sourceBuiltin != *testBuiltin) + { + result = false; + result.message() << fmt::format("Builtin \"{}\" had a mismatch of builtin handles: {} =/= {}.", name, sourceBuiltin.id, testBuiltin->id); + } + return result; + }()); +} + +/// Test that for all inline-dialects the corresponding object dialect contains all inline-dialect builtins and they +/// have the same handle. +BOOST_DATA_TEST_CASE( + builtin_inline_to_object_compatibility, + bdata::make(generateConfigs(langutil::EVMVersion::allVersions(), {false})), + configToTest +) +{ + auto const& dialect = EVMDialect::strictAssemblyForEVM(configToTest.evmVersion, configToTest.eofVersion); + auto const& dialectForObjects = EVMDialect::strictAssemblyForEVMObjects(configToTest.evmVersion, configToTest.eofVersion); + + std::set const inlineBuiltinNames = dialect.builtinFunctionNames(); + + std::vector inlineHandles; + inlineHandles.reserve(inlineBuiltinNames.size()); + std::vector> objectHandles; + objectHandles.reserve(inlineBuiltinNames.size()); + + for (auto const& builtinFunctionName: inlineBuiltinNames) + { + std::optional handle = dialect.findBuiltin(builtinFunctionName); + soltestAssert(handle.has_value()); + inlineHandles.push_back(*handle); + objectHandles.push_back(dialectForObjects.findBuiltin(builtinFunctionName)); + } + + BOOST_REQUIRE([&]() -> boost::test_tools::predicate_result + { + boost::test_tools::predicate_result result{true}; + for (auto const& [name, inlineHandle, objectHandle]: ranges::views::zip(inlineBuiltinNames, inlineHandles, objectHandles)) + if (!objectHandle || inlineHandle != *objectHandle) + { + result = false; + result.message() + << fmt::format("Builtin \"{}\" had a mismatch of builtin handles: {} != ", name, inlineHandle.id) + << (objectHandle.has_value() ? std::to_string(objectHandle->id) : "null"); + } + return result; + }()); +} + +BOOST_AUTO_TEST_SUITE_END()