-
Notifications
You must be signed in to change notification settings - Fork 394
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Convert Luau heap dumps to Chrome heap snapshots (#1554)
Adds a script for (approximately) converting Luau heap dumps to Chrome heap snapshots. Useful for visually inspecting a heap dump within Chrome's UI.
- Loading branch information
1 parent
d0222bb
commit 8b8118b
Showing
1 changed file
with
221 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
#!/usr/bin/python3 | ||
# This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details | ||
|
||
# Given a Luau heap dump, this tool generates a heap snapshot which can be imported by Chrome's DevTools Memory panel | ||
# To generate a snapshot, use luaC_dump, ideally preceded by luaC_fullgc | ||
# To import in Chrome, ensure the snapshot has the .heapsnapshot extension and go to: Inspect -> Memory -> Load Profile | ||
# A reference for the heap snapshot schema can be found here: https://learn.microsoft.com/en-us/microsoft-edge/devtools-guide-chromium/memory-problems/heap-snapshot-schema | ||
|
||
# Usage: python3 heapsnapshot.py luauDump.json heapSnapshot.heapsnapshot | ||
|
||
import json | ||
import sys | ||
|
||
# Header describing the snapshot format, copied from a real Chrome heap snapshot | ||
snapshotMeta = { | ||
"node_fields": ["type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness"], | ||
"node_types": [ | ||
["hidden", "array", "string", "object", "code", "closure", "regexp", "number", "native", "synthetic", "concatenated string", "sliced string", "symbol", "bigint", "object shape"], | ||
"string", "number", "number", "number", "number", "number" | ||
], | ||
"edge_fields": ["type", "name_or_index", "to_node"], | ||
"edge_types": [ | ||
["context", "element", "property", "internal", "hidden", "shortcut", "weak"], | ||
"string_or_number", "node" | ||
], | ||
"trace_function_info_fields": ["function_id", "name", "script_name", "script_id", "line", "column"], | ||
"trace_node_fields": ["id", "function_info_index", "count", "size", "children"], | ||
"sample_fields": ["timestamp_us", "last_assigned_id"], | ||
"location_fields": ["object_index", "script_id", "line", "column"], | ||
} | ||
|
||
# These indices refer to the index in the snapshot's metadata header | ||
nodeTypeToMetaIndex = {type: i for i, type in enumerate(snapshotMeta["node_types"][0])} | ||
edgeTypeToMetaIndex = {type: i for i, type in enumerate(snapshotMeta["edge_types"][0])} | ||
|
||
nodeFieldCount = len(snapshotMeta["node_fields"]) | ||
edgeFieldCount = len(snapshotMeta["edge_fields"]) | ||
|
||
|
||
def readAddresses(data): | ||
# Ordered list of addresses to ensure the registry is the first node, and also so we can process nodes in index order | ||
addresses = [] | ||
addressToNodeIndex = {} | ||
|
||
def addAddress(address): | ||
assert address not in addressToNodeIndex, f"Address already exists in the snapshot: '{address}'" | ||
addresses.append(address) | ||
addressToNodeIndex[address] = len(addresses) - 1 | ||
|
||
# The registry is a special case that needs to be either the first or last node to ensure gc "distances" are calculated correctly | ||
registryAddress = data["roots"]["registry"] | ||
addAddress(registryAddress) | ||
|
||
for address, obj in data["objects"].items(): | ||
if address == registryAddress: | ||
continue | ||
addAddress(address) | ||
|
||
return addresses, addressToNodeIndex | ||
|
||
|
||
def convertToSnapshot(data): | ||
addresses, addressToNodeIndex = readAddresses(data) | ||
|
||
# Some notable idiosyncrasies with the heap snapshot format: | ||
# 1. The snapshot format contains a flat array of nodes and edges. Oddly, edges must reference the "absolute" index of a node's first element after flattening. | ||
# 2. A node's outgoing edges are implicitly represented by a contiguous block of edges in the edges array which correspond to the node's position | ||
# in the nodes array and its edge count. So if the first node has 3 edges, the first 3 edges in the edges array are its edges, and so on. | ||
|
||
nodes = [] | ||
edges = [] | ||
strings = [] | ||
|
||
stringToSnapshotIndex = {} | ||
|
||
def getUniqueId(address): | ||
# TODO: we should hash this to an int32 instead of using the address directly | ||
# Addresses are hexadecimal strings | ||
return int(address, 16) | ||
|
||
def addNode(node): | ||
assert len(node) == nodeFieldCount, f"Expected {nodeFieldCount} fields, got {len(node)}" | ||
nodes.append(node) | ||
|
||
def addEdge(edge): | ||
assert len(edge) == edgeFieldCount, f"Expected {edgeFieldCount} fields, got {len(edge)}" | ||
edges.append(edge) | ||
|
||
def getStringSnapshotIndex(string): | ||
assert isinstance(string, str), f"'{string}' is not of type string" | ||
if string not in stringToSnapshotIndex: | ||
strings.append(string) | ||
stringToSnapshotIndex[string] = len(strings) - 1 | ||
return stringToSnapshotIndex[string] | ||
|
||
def getNodeSnapshotIndex(address): | ||
# This is the index of the first element of the node in the flattened nodes array | ||
return addressToNodeIndex[address] * nodeFieldCount | ||
|
||
for address in addresses: | ||
obj = data["objects"][address] | ||
edgeCount = 0 | ||
|
||
if obj["type"] == "table": | ||
# TODO: support weak references | ||
name = f"Registry ({address})" if address == data["roots"]["registry"] else f"Luau table ({address})" | ||
if "pairs" in obj: | ||
for i in range(0, len(obj["pairs"]), 2): | ||
key = obj["pairs"][i] | ||
value = obj["pairs"][i + 1] | ||
if key is None and value is None: | ||
# Both the key and value are value types, nothing meaningful to add here | ||
continue | ||
elif key is None: | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex("(Luau table key value type)"), getNodeSnapshotIndex(value)]) | ||
elif value is None: | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)]) | ||
elif data["objects"][key]["type"] == "string": | ||
edgeCount += 2 | ||
# This is a special case where the key is a string, so we can use it as the edge name | ||
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex(data["objects"][key]["data"]), getNodeSnapshotIndex(value)]) | ||
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)]) | ||
else: | ||
edgeCount += 2 | ||
addEdge([edgeTypeToMetaIndex["property"], getStringSnapshotIndex(f'{data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(value)]) | ||
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'Luau table key ref: {data["objects"][key]["type"]} ({key})'), getNodeSnapshotIndex(key)]) | ||
if "array" in obj: | ||
for i, element in enumerate(obj["array"]): | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["element"], i, getNodeSnapshotIndex(element)]) | ||
if "metatable" in obj: | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'metatable ({obj["metatable"]})'), getNodeSnapshotIndex(obj["metatable"])]) | ||
# TODO: consider distinguishing "object" and "array" node types | ||
addNode([nodeTypeToMetaIndex["object"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0]) | ||
elif obj["type"] == "thread": | ||
name = f'Luau thread: {obj["source"]}:{obj["line"]} ({address})' if "source" in obj else f"Luau thread ({address})" | ||
if address == data["roots"]["mainthread"]: | ||
name += " (main thread)" | ||
if "env" in obj: | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'env ({obj["env"]})'), getNodeSnapshotIndex(obj["env"])]) | ||
if "stack" in obj: | ||
for i, frame in enumerate(obj["stack"]): | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f"callstack[{i}]"), getNodeSnapshotIndex(frame)]) | ||
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0]) | ||
elif obj["type"] == "function": | ||
name = f'Luau function: {obj["name"]} ({address})' if "name" in obj else f"Luau anonymous function ({address})" | ||
if "env" in obj: | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'env ({obj["env"]})'), getNodeSnapshotIndex(obj["env"])]) | ||
if "proto" in obj: | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'proto ({obj["proto"]})'), getNodeSnapshotIndex(obj["proto"])]) | ||
if "upvalues" in obj: | ||
for i, upvalue in enumerate(obj["upvalues"]): | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f"up value ({upvalue})"), getNodeSnapshotIndex(upvalue)]) | ||
addNode([nodeTypeToMetaIndex["closure"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0]) | ||
elif obj["type"] == "upvalue": | ||
if "object" in obj: | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(f'upvalue object ({obj["object"]})'), getNodeSnapshotIndex(obj["object"])]) | ||
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f"Luau upvalue ({address})"), getUniqueId(address), obj["size"], edgeCount, 0, 0]) | ||
elif obj["type"] == "userdata": | ||
if "metatable" in obj: | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["internal"], getStringSnapshotIndex(f'metatable ({obj["metatable"]})'), getNodeSnapshotIndex(obj["metatable"])]) | ||
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f"Luau userdata ({address})"), getUniqueId(address), obj["size"], edgeCount, 0, 0]) | ||
elif obj["type"] == "proto": | ||
name = f'Luau proto: {obj["source"]}:{obj["line"]} ({address})' if "source" in obj else f"Luau proto ({address})" | ||
if "constants" in obj: | ||
for constant in obj["constants"]: | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(constant), getNodeSnapshotIndex(constant)]) | ||
if "protos" in obj: | ||
for proto in obj["protos"]: | ||
edgeCount += 1 | ||
addEdge([edgeTypeToMetaIndex["context"], getStringSnapshotIndex(proto), getNodeSnapshotIndex(proto)]) | ||
addNode([nodeTypeToMetaIndex["code"], getStringSnapshotIndex(name), getUniqueId(address), obj["size"], edgeCount, 0, 0]) | ||
elif obj["type"] == "string": | ||
addNode([nodeTypeToMetaIndex["string"], getStringSnapshotIndex(obj["data"]), getUniqueId(address), obj["size"], 0, 0, 0]) | ||
elif obj["type"] == "buffer": | ||
addNode([nodeTypeToMetaIndex["native"], getStringSnapshotIndex(f'buffer ({address})'), getUniqueId(address), obj["size"], 0, 0, 0]) | ||
else: | ||
raise Exception(f"Unknown object type: '{obj['type']}'") | ||
|
||
return { | ||
"snapshot": { | ||
"meta": snapshotMeta, | ||
"node_count": len(nodes), | ||
"edge_count": len(edges), | ||
"trace_function_count": 0, | ||
}, | ||
# flatten the nodes and edges arrays | ||
"nodes": [field for node in nodes for field in node], | ||
"edges": [field for edge in edges for field in edge], | ||
"trace_function_infos": [], | ||
"trace_tree": [], | ||
"samples": [], | ||
"locations": [], | ||
"strings": strings, | ||
} | ||
|
||
|
||
if __name__ == "__main__": | ||
luauDump = sys.argv[1] | ||
heapSnapshot = sys.argv[2] | ||
|
||
with open(luauDump, "r") as file: | ||
dump = json.load(file) | ||
|
||
snapshot = convertToSnapshot(dump) | ||
|
||
with open(heapSnapshot, "w") as file: | ||
json.dump(snapshot, file) | ||
|
||
print(f"Heap snapshot written to: '{heapSnapshot}'") |