From 254af0e0b5f42e3a7bd0cd9352550524e196ebf6 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Wed, 9 Apr 2025 12:18:53 +0200 Subject: [PATCH 1/9] Add instruction location info to eof assembly --- libevmasm/Assembly.cpp | 64 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index a0711cd41756..8f181345b237 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -53,6 +53,54 @@ using namespace solidity::evmasm; using namespace solidity::langutil; using namespace solidity::util; +namespace +{ + +/// Produces instruction location info in RAII style. When an assembly instruction is added to the bytecode, +/// this class can be instantiated in that scope. It will record the current bytecode size (before addition) +/// and, at destruction time, record the new bytecode size. This information is then added to an external +/// instruction locations vector. +/// If the instruction decomposes into multiple individual evm instructions, `emit` can be +/// called for all but the last one (which will be emitted by the destructor). +class InstructionLocationEmitter +{ +public: + InstructionLocationEmitter( + std::vector& _instructionLocations, + bytes const& _bytecode, + size_t const _assemblyItemIndex + ): + m_instructionLocations(_instructionLocations), + m_bytecode(_bytecode), + m_assemblyItemIndex(_assemblyItemIndex), + m_instructionLocationStart(_bytecode.size()) + {} + + ~InstructionLocationEmitter() + { + emit(); + } + + void emit() + { + auto const end = m_bytecode.size(); + m_instructionLocations.emplace_back(LinkerObject::InstructionLocation{ + .start = m_instructionLocationStart, + .end = end, + .assemblyItemIndex = m_assemblyItemIndex + }); + m_instructionLocationStart = end; + } + +private: + std::vector& m_instructionLocations; + bytes const& m_bytecode; + size_t const m_assemblyItemIndex; + size_t m_instructionLocationStart; +}; + +} + std::map> Assembly::s_sharedSourceNames; AssemblyItem const& Assembly::append(AssemblyItem _i) @@ -1607,9 +1655,17 @@ LinkerObject const& Assembly::assembleEOF() const for (auto&& [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate) { auto const sectionStart = ret.bytecode.size(); + + std::vector instructionLocations; + instructionLocations.reserve(codeSection.items.size()); + solAssert(!codeSection.items.empty(), "Empty code section."); - for (AssemblyItem const& item: codeSection.items) + + for (auto const& [assemblyItemIndex, item]: codeSection.items | ranges::views::enumerate) { + // collect instruction locations via side effects + InstructionLocationEmitter instructionLocationEmitter {instructionLocations, ret.bytecode, assemblyItemIndex}; + // store position of the invalid jump destination if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits::max()) m_tagPositionsInBytecode[0] = ret.bytecode.size(); @@ -1725,6 +1781,12 @@ LinkerObject const& Assembly::assembleEOF() const "Code section too large for EOF." ); setBigEndianUint16(ret.bytecode, codeSectionSizePositions[codeSectionIndex], ret.bytecode.size() - sectionStart); + + ret.codeSectionLocations.emplace_back(LinkerObject::CodeSectionLocation{ + .start = sectionStart, + .end = ret.bytecode.size(), + .instructionLocations = std::move(instructionLocations) + }); } for (auto const& [refPos, tagId]: tagRef) From 7572d36bd6e8ec0823f7b488181c85a73c869d60 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Thu, 10 Apr 2025 10:54:11 +0200 Subject: [PATCH 2/9] Refactor legacy assemble to also use RAII instruction location construction --- libevmasm/Assembly.cpp | 158 +++++++++++++++-------------------------- 1 file changed, 57 insertions(+), 101 deletions(-) diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index 8f181345b237..f498a50bd07e 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -1330,22 +1330,12 @@ LinkerObject const& Assembly::assembleLegacy() const uint8_t dataRefPush = static_cast(pushInstruction(bytesPerDataRef)); LinkerObject::CodeSectionLocation codeSectionLocation; + codeSectionLocation.instructionLocations.reserve(items.size()); codeSectionLocation.start = 0; - size_t assemblyItemIndex = 0; - auto assembleInstruction = [&](auto&& _addInstruction) { - size_t start = ret.bytecode.size(); - _addInstruction(); - size_t end = ret.bytecode.size(); - codeSectionLocation.instructionLocations.emplace_back( - LinkerObject::InstructionLocation{ - .start = start, - .end = end, - .assemblyItemIndex = assemblyItemIndex - } - ); - }; - for (AssemblyItem const& item: items) + for (auto const& [assemblyItemIndex, item]: items | ranges::views::enumerate) { + // collect instruction locations via side effects + InstructionLocationEmitter instructionLocationEmitter(codeSectionLocation.instructionLocations, ret.bytecode, assemblyItemIndex); // store position of the invalid jump destination if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits::max()) m_tagPositionsInBytecode[0] = ret.bytecode.size(); @@ -1353,140 +1343,106 @@ LinkerObject const& Assembly::assembleLegacy() const switch (item.type()) { case Operation: - assembleInstruction([&](){ - ret.bytecode += assembleOperation(item); - }); + ret.bytecode += assembleOperation(item); break; case Push: - assembleInstruction([&](){ - ret.bytecode += assemblePush(item); - }); + ret.bytecode += assemblePush(item); break; case PushTag: - { - assembleInstruction([&](){ - ret.bytecode.push_back(tagPush); - tagRefs[ret.bytecode.size()] = item.splitForeignPushTag(); - ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); - }); + ret.bytecode.push_back(tagPush); + tagRefs[ret.bytecode.size()] = item.splitForeignPushTag(); + ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); break; - } case PushData: - assembleInstruction([&]() { - ret.bytecode.push_back(dataRefPush); - dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size())); - ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); - }); + ret.bytecode.push_back(dataRefPush); + dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); break; case PushSub: - assembleInstruction([&]() { - assertThrow(item.data() <= std::numeric_limits::max(), AssemblyException, ""); - ret.bytecode.push_back(dataRefPush); - subRefs.insert(std::make_pair(static_cast(item.data()), ret.bytecode.size())); - ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); - }); + assertThrow(item.data() <= std::numeric_limits::max(), AssemblyException, ""); + ret.bytecode.push_back(dataRefPush); + subRefs.insert(std::make_pair(static_cast(item.data()), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); break; case PushSubSize: { - assembleInstruction([&](){ - assertThrow(item.data() <= std::numeric_limits::max(), AssemblyException, ""); - auto s = subAssemblyById(static_cast(item.data()))->assemble().bytecode.size(); - item.setPushedValue(u256(s)); - unsigned b = std::max(1, numberEncodingSize(s)); - ret.bytecode.push_back(static_cast(pushInstruction(b))); - ret.bytecode.resize(ret.bytecode.size() + b); - bytesRef byr(&ret.bytecode.back() + 1 - b, b); - toBigEndian(s, byr); - }); + assertThrow(item.data() <= std::numeric_limits::max(), AssemblyException, ""); + auto s = subAssemblyById(static_cast(item.data()))->assemble().bytecode.size(); + item.setPushedValue(u256(s)); + unsigned b = std::max(1, numberEncodingSize(s)); + ret.bytecode.push_back(static_cast(pushInstruction(b))); + ret.bytecode.resize(ret.bytecode.size() + b); + bytesRef byr(&ret.bytecode.back() + 1 - b, b); + toBigEndian(s, byr); break; } case PushProgramSize: - { - assembleInstruction([&](){ - ret.bytecode.push_back(dataRefPush); - sizeRefs.push_back(static_cast(ret.bytecode.size())); - ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); - }); + ret.bytecode.push_back(dataRefPush); + sizeRefs.push_back(static_cast(ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); break; - } case PushLibraryAddress: { - assembleInstruction([&]() { - auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size()); - ret.bytecode += bytecode; - ret.linkReferences.insert(linkRef); - }); + auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size()); + ret.bytecode += bytecode; + ret.linkReferences.insert(linkRef); break; } case PushImmutable: - assembleInstruction([&]() { - ret.bytecode.push_back(static_cast(Instruction::PUSH32)); - // Maps keccak back to the "identifier" std::string of that immutable. - ret.immutableReferences[item.data()].first = m_immutables.at(item.data()); - // Record the bytecode offset of the PUSH32 argument. - ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size()); - // Advance bytecode by 32 bytes (default initialized). - ret.bytecode.resize(ret.bytecode.size() + 32); - }); + ret.bytecode.push_back(static_cast(Instruction::PUSH32)); + // Maps keccak back to the "identifier" std::string of that immutable. + ret.immutableReferences[item.data()].first = m_immutables.at(item.data()); + // Record the bytecode offset of the PUSH32 argument. + ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size()); + // Advance bytecode by 32 bytes (default initialized). + ret.bytecode.resize(ret.bytecode.size() + 32); break; case VerbatimBytecode: ret.bytecode += assembleVerbatimBytecode(item); break; case AssignImmutable: { + // this decomposes into multiple evm instructions, so we manually call emit on `addInstructionLocation` // Expect 2 elements on stack (source, dest_base) auto const& offsets = immutableReferencesBySub[item.data()].second; for (size_t i = 0; i < offsets.size(); ++i) { if (i != offsets.size() - 1) { - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::DUP2)); - }); - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::DUP2)); - }); + ret.bytecode.push_back(static_cast(Instruction::DUP2)); + instructionLocationEmitter.emit(); + ret.bytecode.push_back(static_cast(Instruction::DUP2)); + instructionLocationEmitter.emit(); } - assembleInstruction([&]() { - // TODO: should we make use of the constant optimizer methods for pushing the offsets? - bytes offsetBytes = toCompactBigEndian(u256(offsets[i])); - ret.bytecode.push_back(static_cast(pushInstruction(static_cast(offsetBytes.size())))); - ret.bytecode += offsetBytes; - }); - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::ADD)); - }); - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::MSTORE)); - }); + // TODO: should we make use of the constant optimizer methods for pushing the offsets? + bytes offsetBytes = toCompactBigEndian(u256(offsets[i])); + ret.bytecode.push_back(static_cast(pushInstruction(static_cast(offsetBytes.size())))); + ret.bytecode += offsetBytes; + instructionLocationEmitter.emit(); + ret.bytecode.push_back(static_cast(Instruction::ADD)); + instructionLocationEmitter.emit(); + ret.bytecode.push_back(static_cast(Instruction::MSTORE)); + // no emit needed here, it's taken care of by the destructor of addInstructionLocation } if (offsets.empty()) { - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::POP)); - }); - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::POP)); - }); + ret.bytecode.push_back(static_cast(Instruction::POP)); + instructionLocationEmitter.emit(); + ret.bytecode.push_back(static_cast(Instruction::POP)); + // no emit needed here, it's taken care of by the destructor of addInstructionLocation } immutableReferencesBySub.erase(item.data()); break; } case PushDeployTimeAddress: - assembleInstruction([&]() { - ret.bytecode += assemblePushDeployTimeAddress(); - }); + ret.bytecode += assemblePushDeployTimeAddress(); break; case Tag: - assembleInstruction([&](){ - ret.bytecode += assembleTag(item, ret.bytecode.size(), true); - }); + ret.bytecode += assembleTag(item, ret.bytecode.size(), true); break; default: solAssert(false, "Unexpected opcode while assembling."); } - - ++assemblyItemIndex; } codeSectionLocation.end = ret.bytecode.size(); From 4e637dbf4fc6491662993181e77d51f88c037a67 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Sat, 12 Apr 2025 07:30:38 +0200 Subject: [PATCH 3/9] Ethdebug requires assembly instance --- libevmasm/Ethdebug.cpp | 2 +- libevmasm/Ethdebug.h | 2 +- libsolidity/interface/CompilerStack.cpp | 3 ++- libyul/YulStack.cpp | 4 ++-- test/libevmasm/Assembler.cpp | 4 ++-- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/libevmasm/Ethdebug.cpp b/libevmasm/Ethdebug.cpp index 6e635ef57952..0251600066ce 100644 --- a/libevmasm/Ethdebug.cpp +++ b/libevmasm/Ethdebug.cpp @@ -77,7 +77,7 @@ Json programInstructions(Assembly const& _assembly, LinkerObject const& _linkerO } // anonymous namespace -Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject) +Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const& _assembly, LinkerObject const& _linkerObject) { Json result = Json::object(); result["contract"] = Json::object(); diff --git a/libevmasm/Ethdebug.h b/libevmasm/Ethdebug.h index 72ac16037969..fdb693b190c4 100644 --- a/libevmasm/Ethdebug.h +++ b/libevmasm/Ethdebug.h @@ -27,7 +27,7 @@ namespace solidity::evmasm::ethdebug { // returns ethdebug/format/program. -Json program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject); +Json program(std::string_view _name, unsigned _sourceId, Assembly const& _assembly, LinkerObject const& _linkerObject); // returns ethdebug/format/info/resources Json resources(std::vector const& _sources, std::string const& _version); diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index 1b0a299f7291..5c49f6e17940 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -1216,8 +1216,9 @@ Json CompilerStack::ethdebug(Contract const& _contract, bool _runtime) const solUnimplementedAssert(!isExperimentalSolidity()); evmasm::LinkerObject const& object = _runtime ? _contract.runtimeObject : _contract.object; std::shared_ptr const& assembly = _runtime ? _contract.evmRuntimeAssembly : _contract.evmAssembly; + solAssert(assembly); solAssert(sourceIndices().contains(_contract.contract->sourceUnitName())); - return evmasm::ethdebug::program(_contract.contract->name(), sourceIndices()[_contract.contract->sourceUnitName()], assembly.get(), object); + return evmasm::ethdebug::program(_contract.contract->name(), sourceIndices()[_contract.contract->sourceUnitName()], *assembly, object); } bytes CompilerStack::cborMetadata(std::string const& _contractName, bool _forIR) const diff --git a/libyul/YulStack.cpp b/libyul/YulStack.cpp index 5adb8d6d5c9a..525269575b6a 100644 --- a/libyul/YulStack.cpp +++ b/libyul/YulStack.cpp @@ -275,14 +275,14 @@ YulStack::assembleWithDeployed(std::optional _deployName) ); } if (debugInfoSelection().ethdebug) - creationObject.ethdebug = evmasm::ethdebug::program(creationObject.assembly->name(), 0, creationObject.assembly.get(), *creationObject.bytecode.get()); + creationObject.ethdebug = evmasm::ethdebug::program(creationObject.assembly->name(), 0, *creationObject.assembly, *creationObject.bytecode); if (deployedAssembly) { deployedObject.bytecode = std::make_shared(deployedAssembly->assemble()); deployedObject.assembly = deployedAssembly; if (debugInfoSelection().ethdebug) - deployedObject.ethdebug = evmasm::ethdebug::program(deployedObject.assembly->name(), 0, deployedObject.assembly.get(), *deployedObject.bytecode.get()); + deployedObject.ethdebug = evmasm::ethdebug::program(deployedObject.assembly->name(), 0, *deployedObject.assembly, *deployedObject.bytecode); solAssert(deployedAssembly->codeSections().size() == 1); deployedObject.sourceMappings = std::make_unique( evmasm::AssemblyItem::computeSourceMapping( diff --git a/test/libevmasm/Assembler.cpp b/test/libevmasm/Assembler.cpp index 6a41f7a7394c..32855cfbc211 100644 --- a/test/libevmasm/Assembler.cpp +++ b/test/libevmasm/Assembler.cpp @@ -434,7 +434,7 @@ BOOST_AUTO_TEST_CASE(ethdebug_program_last_instruction_with_immediate_arguments) assembly.append(AssemblyItem{0x11223344}); LinkerObject output = assembly.assemble(); - Json const program = ethdebug::program("", 0, &assembly, output); + Json const program = ethdebug::program("", 0, assembly, output); BOOST_REQUIRE(program["instructions"].size() == 1); BOOST_REQUIRE(program["instructions"][0]["operation"]["mnemonic"] == "PUSH4"); BOOST_REQUIRE(program["instructions"][0]["operation"]["arguments"][0] == "0x11223344"); @@ -445,7 +445,7 @@ BOOST_AUTO_TEST_CASE(ethdebug_program_last_instruction_with_immediate_arguments) assembly.append(AssemblyItem{0x1122334455}); LinkerObject output = assembly.assemble(); - Json const program = ethdebug::program("", 0, &assembly, output); + Json const program = ethdebug::program("", 0, assembly, output); BOOST_REQUIRE(program["instructions"].size() == 2); BOOST_REQUIRE(program["instructions"][0]["operation"]["mnemonic"] == "PUSH0"); BOOST_REQUIRE(!program["instructions"][0]["operation"].contains("arguments")); From 2b6d713308afd6fbe1c28e067f260083e002bdfe Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Tue, 8 Apr 2025 16:41:37 +0200 Subject: [PATCH 4/9] Ethdebug instructions output over multiple code sections --- Changelog.md | 1 + libevmasm/Ethdebug.cpp | 85 ++++++++++++++++++++++++++++-------------- 2 files changed, 57 insertions(+), 29 deletions(-) diff --git a/Changelog.md b/Changelog.md index e8ccbb525302..fe5be8fb7190 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,7 @@ Language Features: Compiler Features: +* Ethdebug: Experimental support for instructions and source locations under EOF. * NatSpec: Capture Natspec documentation of `enum` values in the AST. diff --git a/libevmasm/Ethdebug.cpp b/libevmasm/Ethdebug.cpp index 0251600066ce..0a4c56ddd0a0 100644 --- a/libevmasm/Ethdebug.cpp +++ b/libevmasm/Ethdebug.cpp @@ -18,6 +18,8 @@ #include +#include + using namespace solidity; using namespace solidity::evmasm; using namespace solidity::evmasm::ethdebug; @@ -25,25 +27,35 @@ using namespace solidity::evmasm::ethdebug; namespace { -Json programInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned _sourceId) +std::vector codeSectionInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned _sourceId, size_t const _codeSectionIndex) { - solUnimplementedAssert(_assembly.eofVersion() == std::nullopt, "ethdebug does not yet support EOF."); - solUnimplementedAssert(_assembly.codeSections().size() == 1, "ethdebug does not yet support multiple code-sections."); - for (auto const& instruction: _assembly.codeSections()[0].items) - solUnimplementedAssert(instruction.type() != VerbatimBytecode, "Verbatim bytecode is currently not supported by ethdebug."); - - solAssert(_linkerObject.codeSectionLocations.size() == 1); - solAssert(_linkerObject.codeSectionLocations[0].end <= _linkerObject.bytecode.size()); - Json instructions = Json::array(); - for (size_t i = 0; i < _linkerObject.codeSectionLocations[0].instructionLocations.size(); ++i) + solAssert(_codeSectionIndex < _linkerObject.codeSectionLocations.size()); + solAssert(_codeSectionIndex < _assembly.codeSections().size()); + auto const& locations = _linkerObject.codeSectionLocations[_codeSectionIndex]; + auto const& codeSection = _assembly.codeSections().at(_codeSectionIndex); + + std::vector instructions; + instructions.reserve(codeSection.items.size()); + + bool const codeSectionContainsVerbatim = ranges::any_of( + codeSection.items, + [](auto const& _instruction) { return _instruction.type() == VerbatimBytecode; } + ); + solUnimplementedAssert(!codeSectionContainsVerbatim, "Verbatim bytecode is currently not supported by ethdebug."); + + for (auto const& currentInstruction: locations.instructionLocations) { - LinkerObject::InstructionLocation currentInstruction = _linkerObject.codeSectionLocations[0].instructionLocations[i]; - size_t start = currentInstruction.start; - size_t end = currentInstruction.end; - size_t assemblyItemIndex = currentInstruction.assemblyItemIndex; + size_t const start = currentInstruction.start; + size_t const end = currentInstruction.end; + + // some instructions do not contribute to the bytecode + if (start == end) + continue; + + size_t const assemblyItemIndex = currentInstruction.assemblyItemIndex; solAssert(end <= _linkerObject.bytecode.size()); solAssert(start < end); - solAssert(assemblyItemIndex < _assembly.codeSections().at(0).items.size()); + solAssert(assemblyItemIndex < codeSection.items.size()); Json operation = Json::object(); operation["mnemonic"] = instructionInfo(static_cast(_linkerObject.bytecode[start]), _assembly.evmVersion()).name; static size_t constexpr instructionSize = 1; @@ -56,25 +68,40 @@ Json programInstructions(Assembly const& _assembly, LinkerObject const& _linkerO solAssert(!argumentData.empty()); operation["arguments"] = Json::array({util::toHex(argumentData, util::HexPrefix::Add)}); } - langutil::SourceLocation const& location = _assembly.codeSections().at(0).items.at(assemblyItemIndex).location(); - Json instruction = Json::object(); - instruction["offset"] = start; - instruction["operation"] = operation; - - instruction["context"] = Json::object(); - instruction["context"]["code"] = Json::object(); - instruction["context"]["code"]["source"] = Json::object(); - instruction["context"]["code"]["source"]["id"] = static_cast(_sourceId); - - instruction["context"]["code"]["range"] = Json::object(); - instruction["context"]["code"]["range"]["offset"] = location.start; - instruction["context"]["code"]["range"]["length"] = location.end - location.start; - instructions.emplace_back(instruction); + langutil::SourceLocation const& location = codeSection.items.at(assemblyItemIndex).location(); + instructions.emplace_back(Json{ + { "offset", start }, + {"operation", operation }, + { + "context", { + "code", { + "source", { + { "id", static_cast(_sourceId) }, + }, + "range", { + { "offset", location.start }, + { "length", location.end - location.start } + } + } + } + } + }); } return instructions; } +Json programInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned _sourceId) +{ + auto const numCodeSections = _assembly.codeSections().size(); + solAssert(numCodeSections == _linkerObject.codeSectionLocations.size()); + + std::vector instructionInfo; + for (size_t codeSectionIndex = 0; codeSectionIndex < numCodeSections; ++codeSectionIndex) + instructionInfo += codeSectionInstructions(_assembly, _linkerObject, _sourceId, codeSectionIndex); + return instructionInfo; +} + } // anonymous namespace Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const& _assembly, LinkerObject const& _linkerObject) From 07e29d258c048af8ba7bbda7a6321ee030b7ba0a Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Fri, 11 Apr 2025 10:48:54 +0200 Subject: [PATCH 5/9] Add ethdebug schema --- libevmasm/CMakeLists.txt | 2 + libevmasm/EthdebugSchema.cpp | 143 +++++++++++++++++++++++++++++ libevmasm/EthdebugSchema.h | 172 +++++++++++++++++++++++++++++++++++ 3 files changed, 317 insertions(+) create mode 100644 libevmasm/EthdebugSchema.cpp create mode 100644 libevmasm/EthdebugSchema.h diff --git a/libevmasm/CMakeLists.txt b/libevmasm/CMakeLists.txt index a7441b85e264..74aaeb6c293b 100644 --- a/libevmasm/CMakeLists.txt +++ b/libevmasm/CMakeLists.txt @@ -6,6 +6,8 @@ set(sources AssemblyItem.h Ethdebug.cpp Ethdebug.h + EthdebugSchema.cpp + EthdebugSchema.h EVMAssemblyStack.cpp EVMAssemblyStack.h BlockDeduplicator.cpp diff --git a/libevmasm/EthdebugSchema.cpp b/libevmasm/EthdebugSchema.cpp new file mode 100644 index 000000000000..c54b7167f53e --- /dev/null +++ b/libevmasm/EthdebugSchema.cpp @@ -0,0 +1,143 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +// SPDX-License-Identifier: GPL-3.0 + +#include + +#include +#include + +using namespace solidity; +using namespace solidity::evmasm::ethdebug; + +void schema::data::to_json(Json& _json, HexValue const& _hexValue) +{ + _json = util::toHex(_hexValue.value, util::HexPrefix::Add); +} + +void schema::data::to_json(Json& _json, Unsigned const& _unsigned) +{ + std::visit(util::GenericVisitor{ + [&](HexValue const& _hexValue) { _json = _hexValue; }, + [&](std::uint64_t const _value) { _json = _value; } + }, _unsigned.value); +} + +void schema::materials::to_json(Json& _json, ID const& _id) +{ + std::visit(util::GenericVisitor{ + [&](std::string const& _hexValue) { _json = _hexValue; }, + [&](std::uint64_t const _value) { _json = _value; } + }, _id.value); +} + +void schema::materials::to_json(Json& _json, Reference const& _source) +{ + _json["id"] = _source.id; + if (_source.type) + _json["type"] = *_source.type == Reference::Type::Compilation ? "compilation" : "source"; +} + +void schema::materials::to_json(Json& _json, SourceRange::Range const& _range) +{ + _json["length"] = _range.length; + _json["offset"] = _range.offset; +} + + +void schema::materials::to_json(Json& _json, SourceRange const& _sourceRange) +{ + _json["source"] = _sourceRange.source; + if (_sourceRange.range) + _json["range"] = *_sourceRange.range; +} + +void schema::to_json(Json& _json, Program::Contract const& _contract) +{ + if (_contract.name) + _json["name"] = *_contract.name; + _json["definition"] = _contract.definition; +} + +void schema::program::to_json(Json& _json, Context::Variable const& _contextVariable) +{ + auto const numProperties = + _contextVariable.identifier.has_value() + + _contextVariable.declaration.has_value(); + solRequire(numProperties >= 1, EthdebugException, "Context variable has no properties."); + if (_contextVariable.identifier) + { + solRequire(!_contextVariable.identifier->empty(), EthdebugException, "Variable identifier must not be empty."); + _json["identifier"] = *_contextVariable.identifier; + } + if (_contextVariable.declaration) + _json["declaration"] = *_contextVariable.declaration; +} + +void schema::program::to_json(Json& _json, Context const& _context) +{ + solRequire(_context.code.has_value() + _context.remark.has_value() + _context.variables.has_value() >= 1, EthdebugException, "Context needs >=1 properties."); + if (_context.code) + _json["code"] = *_context.code; + if (_context.variables) + { + solRequire(!_context.variables->empty(), EthdebugException, "Context variables must not be empty if provided."); + _json["variables"] = *_context.variables; + } + if (_context.remark) + _json["remark"] = *_context.remark; +} + +void schema::program::to_json(Json& _json, Instruction::Operation const& _operation) +{ + _json = { {"mnemonic", _operation.mnemonic} }; + if (!_operation.arguments.empty()) + _json["arguments"] = _operation.arguments; +} + +void schema::program::to_json(Json& _json, Instruction const& _instruction) +{ + _json["offset"] = _instruction.offset; + if (_instruction.operation) + _json["operation"] = *_instruction.operation; + if (_instruction.context) + _json["context"] = *_instruction.context; +} + +void schema::to_json(Json& _json, Program const& _program) +{ + if (_program.compilation) + _json["compilation"] = *_program.compilation; + _json["contract"] = _program.contract; + _json["environment"] = _program.environment; + if (_program.context) + _json["context"] = *_program.context; + _json["instructions"] = _program.instructions; +} + +void schema::to_json(Json& _json, Program::Environment const& _environment) +{ + switch (_environment) + { + case Program::Environment::CALL: + _json = "call"; + break; + case Program::Environment::CREATE: + _json = "create"; + break; + } +} diff --git a/libevmasm/EthdebugSchema.h b/libevmasm/EthdebugSchema.h new file mode 100644 index 000000000000..5fc8f78fd62e --- /dev/null +++ b/libevmasm/EthdebugSchema.h @@ -0,0 +1,172 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +// SPDX-License-Identifier: GPL-3.0 + +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace solidity::evmasm::ethdebug::schema +{ + +struct EthdebugException: virtual util::Exception {}; + +namespace data +{ + +struct HexValue +{ + bytes value; +}; + +struct Unsigned +{ + template + Unsigned(T const _value) + { + solRequire(static_cast(_value) <= std::numeric_limits::max(), EthdebugException, "Too large value."); + value = static_cast(_value); + } + template + Unsigned(T const _value) + { + solRequire(_value >= 0, EthdebugException, "NonNegativeValue got negative value."); + solRequire(static_cast>(_value) <= std::numeric_limits::max(), EthdebugException, "Too large value."); + value = static_cast(_value); + } + Unsigned(HexValue&& _value): value(std::move(_value)) {} + + std::variant value; +}; + +} + +namespace materials +{ + +struct ID +{ + std::variant value; +}; + +struct Reference +{ + enum class Type { Compilation, Source }; + ID id; + std::optional type; +}; + +struct SourceRange +{ + struct Range + { + data::Unsigned length; + data::Unsigned offset; + }; + + Reference source; + std::optional range; +}; + +} + +namespace program +{ + +struct Context +{ + struct Variable + { + std::optional identifier; + std::optional declaration; + // TODO: type + // TODO: pointer according to ethdebug/format/spec/pointer + }; + + std::optional code; + std::optional> variables; + std::optional remark; +}; + +struct Instruction +{ + struct Operation + { + std::string mnemonic; + std::vector arguments; + }; + + data::Unsigned offset; + std::optional operation; + std::optional context; +}; + +} + +struct Program +{ + enum class Environment + { + CALL, CREATE + }; + + struct Contract + { + std::optional name; + materials::SourceRange definition; + }; + + std::optional compilation; + Contract contract; + Environment environment; + std::optional context; + std::vector instructions; +}; + +namespace data +{ +void to_json(Json& _json, HexValue const& _hexValue); +void to_json(Json& _json, Unsigned const& _unsigned); +} + +namespace materials +{ +void to_json(Json& _json, ID const& _id); +void to_json(Json& _json, Reference const& _source); +void to_json(Json& _json, SourceRange::Range const& _range); +void to_json(Json& _json, SourceRange const& _sourceRange); +} + +namespace program +{ +void to_json(Json& _json, Context::Variable const& _contextVariable); +void to_json(Json& _json, Context const& _context); +void to_json(Json& _json, Instruction::Operation const& _operation); +void to_json(Json& _json, Instruction const& _instruction); +} + +void to_json(Json& _json, Program::Contract const& _contract); +void to_json(Json& _json, Program::Environment const& _environment); +void to_json(Json& _json, Program const& _program); + +} From f468ca8333a0ab66c6b054d1b8e15efdda9a6e3f Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Wed, 9 Apr 2025 15:22:54 +0200 Subject: [PATCH 6/9] Ethdebug uses schema for serialization --- libevmasm/Ethdebug.cpp | 131 ++++++++++++++++---------- libevmasm/Ethdebug.h | 2 +- test/libsolidity/StandardCompiler.cpp | 9 +- 3 files changed, 87 insertions(+), 55 deletions(-) diff --git a/libevmasm/Ethdebug.cpp b/libevmasm/Ethdebug.cpp index 0a4c56ddd0a0..8b87aadd072a 100644 --- a/libevmasm/Ethdebug.cpp +++ b/libevmasm/Ethdebug.cpp @@ -18,6 +18,8 @@ #include +#include + #include using namespace solidity; @@ -27,14 +29,66 @@ using namespace solidity::evmasm::ethdebug; namespace { -std::vector codeSectionInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned _sourceId, size_t const _codeSectionIndex) +schema::program::Instruction::Operation instructionOperation(Assembly const& _assembly, LinkerObject const& _linkerObject, size_t const _start, size_t const _end) +{ + solAssert(_end <= _linkerObject.bytecode.size()); + solAssert(_start < _end); + schema::program::Instruction::Operation operation; + operation.mnemonic = instructionInfo(static_cast(_linkerObject.bytecode[_start]), _assembly.evmVersion()).name; + static size_t constexpr instructionSize = 1; + if (_start + instructionSize < _end) + { + bytes const argumentData( + _linkerObject.bytecode.begin() + static_cast(_start) + instructionSize, + _linkerObject.bytecode.begin() + static_cast(_end) + ); + solAssert(!argumentData.empty()); + operation.arguments = {{schema::data::HexValue{argumentData}}}; + } + return operation; +} + +schema::materials::SourceRange::Range locationRange(langutil::SourceLocation const& _location) +{ + return { + .length = schema::data::Unsigned{_location.end - _location.start}, + .offset = schema::data::Unsigned{_location.start} + }; +} + +schema::materials::Reference sourceReference(unsigned _sourceID) +{ + return { + .id = schema::materials::ID{_sourceID}, + .type = std::nullopt + }; +} + +std::optional instructionContext(Assembly::CodeSection const& _codeSection, size_t _assemblyItemIndex, unsigned _sourceID) +{ + solAssert(_assemblyItemIndex < _codeSection.items.size()); + langutil::SourceLocation const& location = _codeSection.items.at(_assemblyItemIndex).location(); + if (!location.isValid()) + return std::nullopt; + + return schema::program::Context{ + schema::materials::SourceRange{ + .source = sourceReference(_sourceID), + .range = locationRange(location) + }, + std::nullopt, + std::nullopt + }; +} + +std::vector codeSectionInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned const _sourceID, size_t const _codeSectionIndex) { solAssert(_codeSectionIndex < _linkerObject.codeSectionLocations.size()); solAssert(_codeSectionIndex < _assembly.codeSections().size()); auto const& locations = _linkerObject.codeSectionLocations[_codeSectionIndex]; auto const& codeSection = _assembly.codeSections().at(_codeSectionIndex); - std::vector instructions; + std::vector instructions; instructions.reserve(codeSection.items.size()); bool const codeSectionContainsVerbatim = ranges::any_of( @@ -52,72 +106,47 @@ std::vector codeSectionInstructions(Assembly const& _assembly, LinkerObjec if (start == end) continue; - size_t const assemblyItemIndex = currentInstruction.assemblyItemIndex; - solAssert(end <= _linkerObject.bytecode.size()); - solAssert(start < end); - solAssert(assemblyItemIndex < codeSection.items.size()); - Json operation = Json::object(); - operation["mnemonic"] = instructionInfo(static_cast(_linkerObject.bytecode[start]), _assembly.evmVersion()).name; - static size_t constexpr instructionSize = 1; - if (start + instructionSize < end) - { - bytes const argumentData( - _linkerObject.bytecode.begin() + static_cast(start) + instructionSize, - _linkerObject.bytecode.begin() + static_cast(end) - ); - solAssert(!argumentData.empty()); - operation["arguments"] = Json::array({util::toHex(argumentData, util::HexPrefix::Add)}); - } - langutil::SourceLocation const& location = codeSection.items.at(assemblyItemIndex).location(); - instructions.emplace_back(Json{ - { "offset", start }, - {"operation", operation }, - { - "context", { - "code", { - "source", { - { "id", static_cast(_sourceId) }, - }, - "range", { - { "offset", location.start }, - { "length", location.end - location.start } - } - } - } - } + instructions.emplace_back(schema::program::Instruction{ + .offset = schema::data::Unsigned{start}, + .operation = instructionOperation(_assembly, _linkerObject, start, end), + .context = instructionContext(codeSection, currentInstruction.assemblyItemIndex, _sourceID) }); } return instructions; } -Json programInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned _sourceId) +std::vector programInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned const _sourceID) { auto const numCodeSections = _assembly.codeSections().size(); solAssert(numCodeSections == _linkerObject.codeSectionLocations.size()); - std::vector instructionInfo; + std::vector instructionInfo; for (size_t codeSectionIndex = 0; codeSectionIndex < numCodeSections; ++codeSectionIndex) - instructionInfo += codeSectionInstructions(_assembly, _linkerObject, _sourceId, codeSectionIndex); + instructionInfo += codeSectionInstructions(_assembly, _linkerObject, _sourceID, codeSectionIndex); return instructionInfo; } } // anonymous namespace -Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const& _assembly, LinkerObject const& _linkerObject) +Json ethdebug::program(std::string_view _name, unsigned _sourceID, Assembly const& _assembly, LinkerObject const& _linkerObject) { - Json result = Json::object(); - result["contract"] = Json::object(); - result["contract"]["name"] = _name; - result["contract"]["definition"] = Json::object(); - result["contract"]["definition"]["source"] = Json::object(); - result["contract"]["definition"]["source"]["id"] = _sourceId; - if (_assembly) - { - result["environment"] = _assembly->isCreation() ? "create" : "call"; - result["instructions"] = programInstructions(*_assembly, _linkerObject, _sourceId); - } - return result; + return schema::Program{ + .compilation = std::nullopt, + .contract = { + .name = std::string{_name}, + .definition = { + .source = { + .id = {_sourceID}, + .type = std::nullopt + }, + .range = std::nullopt + } + }, + .environment = _assembly.isCreation() ? schema::Program::Environment::CREATE : schema::Program::Environment::CALL, + .context = std::nullopt, + .instructions = programInstructions(_assembly, _linkerObject, _sourceID) + }; } Json ethdebug::resources(std::vector const& _sources, std::string const& _version) diff --git a/libevmasm/Ethdebug.h b/libevmasm/Ethdebug.h index fdb693b190c4..2e0df3484ba6 100644 --- a/libevmasm/Ethdebug.h +++ b/libevmasm/Ethdebug.h @@ -27,7 +27,7 @@ namespace solidity::evmasm::ethdebug { // returns ethdebug/format/program. -Json program(std::string_view _name, unsigned _sourceId, Assembly const& _assembly, LinkerObject const& _linkerObject); +Json program(std::string_view _name, unsigned _sourceID, Assembly const& _assembly, LinkerObject const& _linkerObject); // returns ethdebug/format/info/resources Json resources(std::vector const& _sources, std::string const& _version); diff --git a/test/libsolidity/StandardCompiler.cpp b/test/libsolidity/StandardCompiler.cpp index b17f566613a1..a41fbb26dad0 100644 --- a/test/libsolidity/StandardCompiler.cpp +++ b/test/libsolidity/StandardCompiler.cpp @@ -2249,9 +2249,12 @@ BOOST_DATA_TEST_CASE(ethdebug_output_instructions_smoketest, boost::unit_test::d BOOST_REQUIRE(instruction.contains("offset")); BOOST_REQUIRE(instruction.contains("operation")); BOOST_REQUIRE(instruction["operation"].contains("mnemonic")); - BOOST_REQUIRE(instruction["context"]["code"]["range"].contains("length")); - BOOST_REQUIRE(instruction["context"]["code"]["range"].contains("offset")); - BOOST_REQUIRE(instruction["context"]["code"]["source"].contains("id")); + if (instruction.contains("context")) + { + BOOST_REQUIRE(instruction["context"]["code"]["range"].contains("length")); + BOOST_REQUIRE(instruction["context"]["code"]["range"].contains("offset")); + BOOST_REQUIRE(instruction["context"]["code"]["source"].contains("id")); + } std::string mnemonic = instruction["operation"]["mnemonic"]; if (mnemonic.find("PUSH") != std::string::npos) { From b09315b1bcfe122dd2497f9d91a5f41a294804e2 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Wed, 9 Apr 2025 15:23:21 +0200 Subject: [PATCH 7/9] Update test expectations with eof-enabled ethdebug --- .../ethdebug_eof_container_osaka/args | 2 +- .../ethdebug_eof_container_osaka/err | 1 - .../ethdebug_eof_container_osaka/exit | 1 - .../ethdebug_eof_container_osaka/output | 251 +++++++++++++++++- 4 files changed, 251 insertions(+), 4 deletions(-) delete mode 100644 test/cmdlineTests/ethdebug_eof_container_osaka/err delete mode 100644 test/cmdlineTests/ethdebug_eof_container_osaka/exit diff --git a/test/cmdlineTests/ethdebug_eof_container_osaka/args b/test/cmdlineTests/ethdebug_eof_container_osaka/args index 65974c6287da..16466b842d0e 100644 --- a/test/cmdlineTests/ethdebug_eof_container_osaka/args +++ b/test/cmdlineTests/ethdebug_eof_container_osaka/args @@ -1 +1 @@ - --experimental-eof-version 1 --evm-version osaka --ethdebug --via-ir + --experimental-eof-version 1 --evm-version osaka --ethdebug --via-ir --pretty-json --json-indent 4 diff --git a/test/cmdlineTests/ethdebug_eof_container_osaka/err b/test/cmdlineTests/ethdebug_eof_container_osaka/err deleted file mode 100644 index 7714685971d2..000000000000 --- a/test/cmdlineTests/ethdebug_eof_container_osaka/err +++ /dev/null @@ -1 +0,0 @@ -Error: ethdebug does not yet support EOF. diff --git a/test/cmdlineTests/ethdebug_eof_container_osaka/exit b/test/cmdlineTests/ethdebug_eof_container_osaka/exit deleted file mode 100644 index d00491fd7e5b..000000000000 --- a/test/cmdlineTests/ethdebug_eof_container_osaka/exit +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/test/cmdlineTests/ethdebug_eof_container_osaka/output b/test/cmdlineTests/ethdebug_eof_container_osaka/output index b29297ee3749..e9b57d885711 100644 --- a/test/cmdlineTests/ethdebug_eof_container_osaka/output +++ b/test/cmdlineTests/ethdebug_eof_container_osaka/output @@ -1,4 +1,253 @@ ======= Debug Data (ethdebug/format/info/resources) ======= -{"compilation":{"compiler":{"name":"solc","version": ""},"sources":[{"id":0,"path":"input.sol"}]}} +{ + "compilation": { + "compiler": { + "name": "solc", + "version": "" + }, + "sources": [ + { + "id": 0, + "path": "input.sol" + } + ] + } +} ======= input.sol:C ======= +Debug Data (ethdebug/format/program): +{ + "contract": { + "definition": { + "source": { + "id": 0 + } + }, + "name": "C" + }, + "environment": "create", + "instructions": [ + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 30, + "operation": { + "arguments": [ + "0x80" + ], + "mnemonic": "PUSH1" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 32, + "operation": { + "arguments": [ + "0x40" + ], + "mnemonic": "PUSH1" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 34, + "operation": { + "mnemonic": "MSTORE" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 35, + "operation": { + "mnemonic": "CALLVALUE" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 36, + "operation": { + "arguments": [ + "0x0005" + ], + "mnemonic": "RJUMPI" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 39, + "operation": { + "mnemonic": "PUSH0" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 40, + "operation": { + "arguments": [ + "0x80" + ], + "mnemonic": "PUSH1" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 42, + "operation": { + "arguments": [ + "0x00" + ], + "mnemonic": "RETURNCONTRACT" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 44, + "operation": { + "arguments": [ + "0x0001" + ], + "mnemonic": "JUMPF" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 47, + "operation": { + "mnemonic": "PUSH0" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 48, + "operation": { + "mnemonic": "DUP1" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 49, + "operation": { + "mnemonic": "REVERT" + } + } + ] +} From f9a098508f67e0b2979717352abece7571cb4522 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Fri, 11 Apr 2025 11:37:46 +0200 Subject: [PATCH 8/9] Test ethdebug output when requested in standard json under EOF --- .../args | 1 + .../in.yul | 16 +++++++ .../input.json | 18 ++++++++ .../output.json | 42 +++++++++++++++++++ .../strip-ethdebug | 0 .../standard_yul_ethdebug_eof/args | 1 + .../standard_yul_ethdebug_eof/in.yul | 17 ++++++++ .../standard_yul_ethdebug_eof/input.json | 13 ++++++ .../standard_yul_ethdebug_eof/output.json | 42 +++++++++++++++++++ .../standard_yul_ethdebug_eof/strip-ethdebug | 0 10 files changed, 150 insertions(+) create mode 100644 test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/args create mode 100644 test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/in.yul create mode 100644 test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/input.json create mode 100644 test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/output.json create mode 100644 test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/strip-ethdebug create mode 100644 test/cmdlineTests/standard_yul_ethdebug_eof/args create mode 100644 test/cmdlineTests/standard_yul_ethdebug_eof/in.yul create mode 100644 test/cmdlineTests/standard_yul_ethdebug_eof/input.json create mode 100644 test/cmdlineTests/standard_yul_ethdebug_eof/output.json create mode 100644 test/cmdlineTests/standard_yul_ethdebug_eof/strip-ethdebug diff --git a/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/args b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/args new file mode 100644 index 000000000000..18532c5a6d3f --- /dev/null +++ b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/args @@ -0,0 +1 @@ +--allow-paths . diff --git a/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/in.yul b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/in.yul new file mode 100644 index 000000000000..aa564d00ce86 --- /dev/null +++ b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/in.yul @@ -0,0 +1,16 @@ +/// @use-src 0:"input.sol" +object "C_6_deployed" { + code { + /// @src 0:60:101 "contract C {..." + mstore(64, 128) + + // f() + fun_f_5() + + /// @src 0:77:99 "function f() public {}" + function fun_f_5() { + sstore(0, 42) + } + /// @src 0:60:101 "contract C {..." + } +} diff --git a/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/input.json b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/input.json new file mode 100644 index 000000000000..a8c3436c1a0f --- /dev/null +++ b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/input.json @@ -0,0 +1,18 @@ +{ + "language": "Yul", + "sources": { + "C": { + "urls": [ + "in.yul" + ] + } + }, + "settings": { + "eofVersion": 1, + "evmVersion": "osaka", + "debug": {"debugInfo": ["ethdebug"]}, + "outputSelection": { + "*": {"*": ["ir", "irOptimized", "evm.bytecode.ethdebug"]} + } + } +} diff --git a/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/output.json b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/output.json new file mode 100644 index 000000000000..2b2f395fadd4 --- /dev/null +++ b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/output.json @@ -0,0 +1,42 @@ +{ + "contracts": { + "C": { + "C_6_deployed": { + "evm": { + "bytecode": { + "ethdebug": "" + } + }, + "ir": "/// ethdebug: enabled +/// @use-src 0:\"input.sol\" +object \"C_6_deployed\" { + code { + /// @src 0:60:101 + mstore(64, 128) + fun_f_5() + /// @src 0:77:99 + function fun_f_5() + { sstore(0, 42) } + } +} +", + "irOptimized": "/// ethdebug: enabled +/// @use-src 0:\"input.sol\" +object \"C_6_deployed\" { + code { + { + /// @src 0:60:101 + mstore(64, 128) + fun_f() + } + /// @src 0:77:99 + function fun_f() + { sstore(0, 42) } + } +} +" + } + } + }, + "ethdebug": "" +} diff --git a/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/strip-ethdebug b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/strip-ethdebug new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/cmdlineTests/standard_yul_ethdebug_eof/args b/test/cmdlineTests/standard_yul_ethdebug_eof/args new file mode 100644 index 000000000000..18532c5a6d3f --- /dev/null +++ b/test/cmdlineTests/standard_yul_ethdebug_eof/args @@ -0,0 +1 @@ +--allow-paths . diff --git a/test/cmdlineTests/standard_yul_ethdebug_eof/in.yul b/test/cmdlineTests/standard_yul_ethdebug_eof/in.yul new file mode 100644 index 000000000000..920aef8e9dc2 --- /dev/null +++ b/test/cmdlineTests/standard_yul_ethdebug_eof/in.yul @@ -0,0 +1,17 @@ +/// @use-src 0:"input.sol" +object "C_6_deployed" { + code { + /// @src 0:60:101 "contract C {..." + mstore(64, 128) + + // f() + fun_f_5() + + /// @src 0:77:99 "function f() public {}" + function fun_f_5() { + sstore(0, 42) + } + /// @src 0:60:101 "contract C {..." + } +} + diff --git a/test/cmdlineTests/standard_yul_ethdebug_eof/input.json b/test/cmdlineTests/standard_yul_ethdebug_eof/input.json new file mode 100644 index 000000000000..4e33c42670a6 --- /dev/null +++ b/test/cmdlineTests/standard_yul_ethdebug_eof/input.json @@ -0,0 +1,13 @@ +{ + "language": "Yul", + "sources": { + "C": {"urls": ["in.yul"]} + }, + "settings": { + "eofVersion": 1, + "evmVersion": "osaka", + "outputSelection": { + "*": {"*": ["evm.bytecode.ethdebug", "evm.deployedBytecode.ethdebug", "ir", "irOptimized"]} + } + } +} diff --git a/test/cmdlineTests/standard_yul_ethdebug_eof/output.json b/test/cmdlineTests/standard_yul_ethdebug_eof/output.json new file mode 100644 index 000000000000..2b2f395fadd4 --- /dev/null +++ b/test/cmdlineTests/standard_yul_ethdebug_eof/output.json @@ -0,0 +1,42 @@ +{ + "contracts": { + "C": { + "C_6_deployed": { + "evm": { + "bytecode": { + "ethdebug": "" + } + }, + "ir": "/// ethdebug: enabled +/// @use-src 0:\"input.sol\" +object \"C_6_deployed\" { + code { + /// @src 0:60:101 + mstore(64, 128) + fun_f_5() + /// @src 0:77:99 + function fun_f_5() + { sstore(0, 42) } + } +} +", + "irOptimized": "/// ethdebug: enabled +/// @use-src 0:\"input.sol\" +object \"C_6_deployed\" { + code { + { + /// @src 0:60:101 + mstore(64, 128) + fun_f() + } + /// @src 0:77:99 + function fun_f() + { sstore(0, 42) } + } +} +" + } + } + }, + "ethdebug": "" +} diff --git a/test/cmdlineTests/standard_yul_ethdebug_eof/strip-ethdebug b/test/cmdlineTests/standard_yul_ethdebug_eof/strip-ethdebug new file mode 100644 index 000000000000..e69de29bb2d1 From dbb992800287451dfd692a8cc09a4fb739d9733a Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Wed, 16 Apr 2025 14:18:26 +0200 Subject: [PATCH 9/9] Compare ethdebug output to program schema --- .circleci/config.yml | 18 +++++++ test/ethdebugSchemaTests/conftest.py | 49 ++++++++++++++++++ test/ethdebugSchemaTests/input_file.json | 27 ++++++++++ test/ethdebugSchemaTests/input_file_eof.json | 29 +++++++++++ .../test_ethdebug_schema_conformity.py | 51 +++++++++++++++++++ 5 files changed, 174 insertions(+) create mode 100644 test/ethdebugSchemaTests/conftest.py create mode 100644 test/ethdebugSchemaTests/input_file.json create mode 100644 test/ethdebugSchemaTests/input_file_eof.json create mode 100755 test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 09b1281c5630..f4cf08bf6b25 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -951,6 +951,8 @@ jobs: chk_pylint: <<: *base_ubuntu2404_small steps: + - install_python3: + packages: pyyaml jsonschema pytest - checkout - run: pylint --version - run: @@ -1534,6 +1536,19 @@ jobs: - reports/externalTests/ - matrix_notify_failure_unless_pr + t_ethdebug_output_validity: + <<: *base_node_small + steps: + - checkout + - attach_workspace: + at: /tmp/workspace + - install_python3: + packages: pyyaml jsonschema pytest + - run: + name: Ethdebug validity tests + command: | + pytest test/ethdebugSchemaTests --solc-binary-path=/tmp/workspace/solc/solc-static-linux -v + c_ext_benchmarks: <<: *base_node_small steps: @@ -1927,6 +1942,9 @@ workflows: #- t_ext: *job_native_compile_ext_chainlink #- t_ext: *job_native_compile_ext_bleeps + - t_ethdebug_output_validity: + <<: *requires_b_ubu_static + - c_ext_benchmarks: <<: *requires_nothing requires: diff --git a/test/ethdebugSchemaTests/conftest.py b/test/ethdebugSchemaTests/conftest.py new file mode 100644 index 000000000000..11b43bb4a16b --- /dev/null +++ b/test/ethdebugSchemaTests/conftest.py @@ -0,0 +1,49 @@ +import shutil +import subprocess +from pathlib import Path + +import pytest +import referencing +import yaml +from referencing.jsonschema import DRAFT202012 + + +def pytest_addoption(parser): + parser.addoption("--solc-binary-path", type=Path, required=True, help="Path to the solidity compiler binary.") + + +@pytest.fixture +def solc_path(request): + solc_path = request.config.getoption("--solc-binary-path") + assert solc_path.is_file() + assert solc_path.exists() + return solc_path + + +@pytest.fixture(scope="module") +def ethdebug_clone_dir(tmpdir_factory): + temporary_dir = Path(tmpdir_factory.mktemp("data")) + yield temporary_dir + shutil.rmtree(temporary_dir) + + +@pytest.fixture(scope="module") +def ethdebug_schema_repository(ethdebug_clone_dir): + process = subprocess.run( + ["git", "clone", "https://github.com/ethdebug/format.git", ethdebug_clone_dir], + encoding='utf8', + capture_output=True, + check=True + ) + assert process.returncode == 0 + + registry = referencing.Registry() + for path in (ethdebug_clone_dir / "schemas").rglob("*.yaml"): + with open(path, "r", encoding="utf8") as f: + schema = yaml.safe_load(f) + if "$id" in schema: + resource = referencing.Resource.from_contents(schema, DRAFT202012) + registry = resource @ registry + else: + raise ValueError(f"Schema did not define an $id: {path}") + return registry diff --git a/test/ethdebugSchemaTests/input_file.json b/test/ethdebugSchemaTests/input_file.json new file mode 100644 index 000000000000..7daf7afd6852 --- /dev/null +++ b/test/ethdebugSchemaTests/input_file.json @@ -0,0 +1,27 @@ +{ + "language": "Solidity", + "sources": { + "a.sol": { + "content": "//SPDX-License-Identifier: GPL-3.0\npragma solidity >=0.0;\ncontract A1 { function a(uint x) public pure { assert(x > 0); } } contract A2 { function a(uint x) public pure { assert(x > 0); } }" + }, + "b.sol": { + "content": "//SPDX-License-Identifier: GPL-3.0\npragma solidity >=0.0;\ncontract A1 { function b(uint x) public pure { assert(x > 0); } } contract B2 { function b(uint x) public pure { assert(x > 0); } }" + } + }, + "settings": { + "viaIR": true, + "debug": { + "debugInfo": [ + "ethdebug" + ] + }, + "outputSelection": { + "*": { + "*": [ + "evm.bytecode.ethdebug", + "evm.deployedBytecode.ethdebug" + ] + } + } + } +} diff --git a/test/ethdebugSchemaTests/input_file_eof.json b/test/ethdebugSchemaTests/input_file_eof.json new file mode 100644 index 000000000000..0462065327f8 --- /dev/null +++ b/test/ethdebugSchemaTests/input_file_eof.json @@ -0,0 +1,29 @@ +{ + "language": "Solidity", + "sources": { + "a.sol": { + "content": "//SPDX-License-Identifier: GPL-3.0\npragma solidity >=0.0;\ncontract A1 { function a(uint x) public pure { assert(x > 0); } } contract A2 { function a(uint x) public pure { assert(x > 0); } }" + }, + "b.sol": { + "content": "//SPDX-License-Identifier: GPL-3.0\npragma solidity >=0.0;\ncontract A1 { function b(uint x) public pure { assert(x > 0); } } contract B2 { function b(uint x) public pure { assert(x > 0); } }" + } + }, + "settings": { + "eofVersion": 1, + "evmVersion": "osaka", + "viaIR": true, + "debug": { + "debugInfo": [ + "ethdebug" + ] + }, + "outputSelection": { + "*": { + "*": [ + "evm.bytecode.ethdebug", + "evm.deployedBytecode.ethdebug" + ] + } + } + } +} diff --git a/test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py b/test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py new file mode 100755 index 000000000000..ae1ab92df4e7 --- /dev/null +++ b/test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +import json +import subprocess +from pathlib import Path + +import jsonschema +import pytest + + +def get_nested_value(dictionary, *keys): + for key in keys: + dictionary = dictionary[key] + return dictionary + + +@pytest.fixture(params=["input_file.json", "input_file_eof.json"]) +def solc_output(request, solc_path): + testfile_dir = Path(__file__).parent + with open(testfile_dir / request.param, "r", encoding="utf8") as f: + source = json.load(f) + + process = subprocess.run( + [solc_path, "--standard-json"], + input=json.dumps(source), + encoding='utf8', + capture_output=True, + check=True, + ) + assert process.returncode == 0 + return json.loads(process.stdout) + + +@pytest.mark.parametrize("output_selection", ["evm.bytecode.ethdebug", "evm.deployedBytecode.ethdebug"], ids=str) +def test_program_schema( + output_selection, + ethdebug_schema_repository, + solc_output +): + validator = jsonschema.Draft202012Validator( + schema={"$ref": "schema:ethdebug/format/program"}, + registry=ethdebug_schema_repository + ) + assert "contracts" in solc_output + for contract in solc_output["contracts"].keys(): + contract_output = solc_output["contracts"][contract] + assert len(contract_output) > 0 + for source in contract_output.keys(): + source_output = contract_output[source] + ethdebug_data = get_nested_value(source_output, *(output_selection.split("."))) + validator.validate(ethdebug_data)