diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 6fab09b1bc2..eb15aa0b7d1 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -49,6 +49,7 @@ set(passes_SOURCES hash-stringify-walker.cpp Outlining.cpp Heap2Local.cpp + HeapSnapshotAnalysis.cpp HeapStoreOptimization.cpp I64ToI32Lowering.cpp Inlining.cpp diff --git a/src/passes/HeapSnapshotAnalysis.cpp b/src/passes/HeapSnapshotAnalysis.cpp new file mode 100644 index 00000000000..62ac27db06e --- /dev/null +++ b/src/passes/HeapSnapshotAnalysis.cpp @@ -0,0 +1,329 @@ +/* + * Copyright 2025 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/module-utils.h" +#include "pass.h" +#include "support/file.h" +#include "support/json.h" +#include "wasm.h" + +namespace wasm { + +constexpr Index NodeTypeIndex = 0; +constexpr Index NodeNameIndex = 1; +constexpr Index NodeIdIndex = 2; +constexpr Index NodeSelfSizeIndex = 3; +constexpr Index NodeEdgeCountIndex = 4; +constexpr Index NodeDetachednessIndex = 5; +constexpr Index NodeFieldCount = 6; + +constexpr Index EdgeTypeIndex = 0; +constexpr Index EdgeNameIndex = 1; +constexpr Index EdgeDestIndex = 2; +constexpr Index EdgeFieldCount = 3; + +constexpr Index DisplayedTypesCount = 10; + +struct Node { + IString name; + Index size; + std::vector edges; +}; + +struct HeapSnapshotAnalysis : Pass { + void run(Module* wasm) override { + auto snapshotFile = + getArgument("heap-snapshot-analysis", + "Heap snapshot analysis requires heap shapshot " + "file argument [--heap-snapshot-analysis=FILE]"); + auto snapshotData = read_file>(snapshotFile, Flags::Text); + std::cout << "snapshot length " << snapshotData.size() << "\n"; + + // TODO: We would prefer UTF-8. + json::Value snapshot; + snapshot.parse(snapshotData.data(), json::Value::StringEncoding::ASCII); + + std::cout << "parsed snapshot\n"; + + validateSnapshotMeta(snapshot); + auto nodes = getGraph(snapshot); + + Index totalHeapSize = 0; + Index totalWasmHeapSize = 0; + Index totalWasmAllocations = 0; + std::unordered_map heapSizeByType; + std::unordered_map allocationsByType; + for (auto& node : nodes) { + totalHeapSize += node.size; + if (maybeGetWasmTypeIndex(node.name)) { + totalWasmHeapSize += node.size; + heapSizeByType[node.name] += node.size; + ++totalWasmAllocations; + ++allocationsByType[node.name]; + } + } + auto sortData = [](auto begin, auto end) { + std::vector> pairs(begin, end); + std::sort(pairs.begin(), pairs.end(), [](auto a, auto b) { + if (a.second != b.second) { + return a.second > b.second; + } + return a.first < b.first; + }); + return pairs; + }; + + auto topTypesByHeapSize = + sortData(heapSizeByType.begin(), heapSizeByType.end()); + auto topTypesByAllocations = + sortData(allocationsByType.begin(), allocationsByType.end()); + + std::cout << "total heap size is " << totalHeapSize << "\n"; + std::cout << "wasm heap size is " << totalWasmHeapSize << " (" + << (double(totalWasmHeapSize) / double(totalHeapSize) * 100) + << "%)\n"; + std::cout << "there are " << totalWasmAllocations + << " wasm objects (average " + << (double(totalWasmHeapSize) / double(totalWasmAllocations)) + << " bytes per object)\n"; + + std::cout << "\ntop types by total heap size:\n"; + for (Index i = 0; i < DisplayedTypesCount; ++i) { + if (i >= topTypesByHeapSize.size()) { + break; + } + auto [name, size] = topTypesByHeapSize[i]; + std::cout << " " << name << ": " << size << " bytes\n"; + } + + std::cout << "\ntop types by allocations:\n"; + for (Index i = 0; i < DisplayedTypesCount; ++i) { + if (i >= topTypesByAllocations.size()) { + break; + } + auto [name, allocs] = topTypesByAllocations[i]; + std::cout << " " << name << ": " << allocs << " objects\n"; + } + } + + void validateSnapshotMeta(json::Value& snapshotRoot) { + auto snapshot = snapshotRoot.maybeGet("snapshot"); + if (!snapshot) { + Fatal() << "Expected .snapshot to exist"; + } + + auto meta = snapshot->maybeGet("meta"); + if (!meta) { + Fatal() << "Expected .snapshot.meta to exist"; + } + + validateNodeFields(meta); + validateEdgeFields(meta); + } + + void validateNodeFields(json::Ref& meta) { + auto nodeFields = meta->maybeGet("node_fields"); + if (!nodeFields) { + Fatal() << "Expected .snapshot.meta.node_fields to exist"; + } + if (!nodeFields->isArray()) { + Fatal() << "Expected .snapshot.meta.node_fields to be an array"; + } + + auto& nodeFieldsArray = nodeFields->getArray(); + + auto expectNodeField = [&](unsigned i, IString field) { + if (nodeFieldsArray.size() <= i || !nodeFieldsArray[i]->isString() || + nodeFieldsArray[i]->getIString() != field) { + Fatal() << "Expected .snapshot.meta.node_fields[" << i << "] to be \"" + << field << '"'; + } + }; + + expectNodeField(NodeTypeIndex, "type"); + expectNodeField(NodeNameIndex, "name"); + expectNodeField(NodeIdIndex, "id"); + expectNodeField(NodeSelfSizeIndex, "self_size"); + expectNodeField(NodeEdgeCountIndex, "edge_count"); + expectNodeField(NodeDetachednessIndex, "detachedness"); + if (nodeFieldsArray.size() != NodeFieldCount) { + Fatal() << "Unexpected fields in .snapshot.meta.node_fields"; + } + } + + void validateEdgeFields(json::Ref& meta) { + json::Ref edgeFields = meta->maybeGet("edge_fields"); + if (!edgeFields) { + Fatal() << "Expected .snapshot.meta.edge_fields to exist"; + } + if (!edgeFields->isArray()) { + Fatal() << "Expected .snapshot.meta.edge_fields to be an array"; + } + + auto& edgeFieldsArray = edgeFields->getArray(); + + auto expectEdgeField = [&](unsigned i, IString field) { + if (edgeFieldsArray.size() <= i || !edgeFieldsArray[i]->isString() || + edgeFieldsArray[i]->getIString() != field) { + Fatal() << "Expected .snapshot.meta.edge_fields[" << i << "] to be \"" + << field << '"'; + } + }; + + expectEdgeField(EdgeTypeIndex, "type"); + expectEdgeField(EdgeNameIndex, "name_or_index"); + expectEdgeField(EdgeDestIndex, "to_node"); + if (edgeFieldsArray.size() != EdgeFieldCount) { + Fatal() << "Unexpected fields in .snapshot.meta.edge_fields"; + } + } + + std::vector getGraph(json::Value& snapshotRoot) { + auto nodes = getNodes(snapshotRoot); + auto edges = getEdges(snapshotRoot); + auto strings = getStrings(snapshotRoot); + + if (nodes.size() % NodeFieldCount != 0) { + Fatal() << "Expected length of .nodes to be a multiple of " + << NodeFieldCount; + } + if (edges.size() % EdgeFieldCount != 0) { + Fatal() << "Expected length of .edges to be a multiple of " + << EdgeFieldCount; + } + + std::vector results; + Index currNode = 0, currEdge = 0; + for (; currNode < nodes.size(); currNode += NodeFieldCount) { + Index nameIndex = nodes[currNode + NodeNameIndex]; + if (nameIndex >= strings.size()) { + Fatal() << "Node name index " << nameIndex + << " is out of bounds at .nodes[" << (currNode + NodeNameIndex) + << ']'; + } + + IString name = strings[nameIndex]; + Index size = nodes[currNode + NodeSelfSizeIndex]; + + Index edgeCount = nodes[currNode + NodeEdgeCountIndex]; + Index edgeFieldCount = edgeCount * EdgeFieldCount; + if (edgeFieldCount < edgeCount || edgeFieldCount > edges.size() || + currEdge > edges.size() - edgeFieldCount) { + Fatal() << "Edge count " << edgeCount << " is out of bounds at .nodes[" + << (currNode + NodeEdgeCountIndex) << ']'; + } + std::vector destinations; + destinations.reserve(edgeCount); + for (Index i = 0; i < edgeCount; ++i) { + Index dest = edges[currEdge + EdgeDestIndex]; + if (dest % NodeFieldCount != 0) { + Fatal() << "Expected to_node index to be a multiple of " + << NodeFieldCount << "\n"; + } + dest /= NodeFieldCount; + destinations.push_back(dest); + currEdge += EdgeFieldCount; + } + results.emplace_back(Node{name, size, std::move(destinations)}); + } + if (currEdge != edges.size()) { + Fatal() << "Unexpected extra edges"; + } + return results; + } + + std::vector getNodes(json::Value& snapshotRoot) { + auto nodes = snapshotRoot.maybeGet("nodes"); + if (!nodes) { + Fatal() << "Expected .nodes to exist"; + } + return getIndices(nodes, ".nodes"); + } + + std::vector getEdges(json::Value& snapshotRoot) { + auto edges = snapshotRoot.maybeGet("edges"); + if (!edges) { + Fatal() << "Expected .edges to exist"; + } + return getIndices(edges, ".edges"); + } + + std::vector getIndices(json::Ref& array, const char* name) { + if (!array->isArray()) { + Fatal() << "Expected " << name << " to be an array"; + } + auto& refs = array->getArray(); + + std::vector results; + for (size_t i = 0; i < refs.size(); ++i) { + if (!refs[i]->isNumber()) { + Fatal() << "Expected " << name << "[" << i << "] to be a number"; + } + // TODO: Bounds checks, etc. + results.push_back(Index(refs[i]->getNumber())); + } + + return results; + } + + std::vector getStrings(json::Value& snapshotRoot) { + auto strings = snapshotRoot.maybeGet("strings"); + if (!strings) { + Fatal() << "Expected .strings to exist"; + } + if (!strings->isArray()) { + Fatal() << "Expected .strings to be an array"; + } + auto& stringRefs = strings->getArray(); + + std::vector results; + for (size_t i = 0; i < stringRefs.size(); ++i) { + if (!stringRefs[i]->isString()) { + Fatal() << "Expected .strings[" << i << "] to be a string"; + } + results.push_back(stringRefs[i]->getIString()); + } + return results; + } + + std::optional maybeGetWasmTypeIndex(IString name) { + // Nodes for Wasm heap objects have names like `$canonNN (wasm)`. Parse + // and return the NN as an index if the name matches that format. + constexpr std::string_view prefix = "$canon"; + constexpr std::string_view suffix = " (wasm)"; + // TODO: Use C++20 `starts_with` + if (name.str.substr(0, prefix.size()) != prefix) { + return std::nullopt; + } + if (name.str.substr(name.str.size() - suffix.size()) != suffix) { + return std::nullopt; + } + size_t count = name.str.size() - prefix.size() - suffix.size(); + std::string_view index = name.str.substr(prefix.size(), count); + Index result = 0; + for (char c : index) { + if (!std::isdigit(c)) { + return std::nullopt; + } + result = result * 10 + (c - '0'); + } + return result; + } +}; + +Pass* createHeapSnapshotAnalysisPass() { return new HeapSnapshotAnalysis(); } + +} // namespace wasm diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 2042bc71d3a..739cd779572 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -220,6 +220,10 @@ void PassRegistry::registerPasses() { createTypeRefiningGUFAPass); registerPass( "heap2local", "replace GC allocations with locals", createHeap2LocalPass); + registerPass("heap-snapshot-analysis", + "Ingest a Chrome DevTools heap snapshot and analysize the " + "memory usage due to each type", + createHeapSnapshotAnalysisPass); registerPass("heap-store-optimization", "optimize heap (GC) stores", createHeapStoreOptimizationPass); diff --git a/src/passes/passes.h b/src/passes/passes.h index e051e466e72..10a6ea918e5 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -63,6 +63,7 @@ Pass* createGUFAPass(); Pass* createGUFACastAllPass(); Pass* createGUFAOptimizingPass(); Pass* createHeap2LocalPass(); +Pass* createHeapSnapshotAnalysisPass(); Pass* createHeapStoreOptimizationPass(); Pass* createI64ToI32LoweringPass(); Pass* createInlineMainPass(); diff --git a/src/support/json.h b/src/support/json.h index 1c8f2bf180f..2f128221915 100644 --- a/src/support/json.h +++ b/src/support/json.h @@ -273,10 +273,21 @@ struct Value { // Start |close| at the opening ", and in the loop below we will always // begin looking at the first character after. char* close = curr; - // Skip escaped " - do { + // Skip escaped ". An even number of \ means we're not escaped. + while (true) { close = strchr(close + 1, '"'); - } while (*(close - 1) == '\\'); + bool escaped = false; + for (char* p = close - 1; p > curr; --p) { + if (*p == '\\') { + escaped = !escaped; + } else { + break; + } + } + if (!escaped) { + break; + } + } assert(close); *close = 0; // end this string, and reuse it straight from the input char* raw = curr + 1; @@ -426,6 +437,16 @@ struct Value { return obj->count(x) > 0; } + Ref maybeGet(IString x) { + if (!isObject()) { + return {}; + } + if (auto it = obj->find(x); it != obj->end()) { + return it->second; + } + return {}; + } + private: // Unescape the input (UTF8) string into one of our internal strings (WTF16). void unescapeIntoWTF16(char* str) {