Skip to content

Commit

Permalink
update export json template (#103)
Browse files Browse the repository at this point in the history
  • Loading branch information
feifei-111 authored Feb 20, 2024
1 parent a3b6d3e commit 6d6ae08
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 14 deletions.
12 changes: 8 additions & 4 deletions padiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@

from .report.hooks import info_hook
from .datas import global_json_laoder as jsons
from . import cinn_diff

try:
from . import cinn_diff
except:
pass


def module_filter(name):
Expand Down Expand Up @@ -134,7 +138,7 @@ def __init__(self, method):
self.__api__ = True

def forward(self, *args, **kwargs):
return self._method(tensor_obj, *args, **kwargs)
return self._method(*args, **kwargs)

def __str__(self):
return self.__name__
Expand All @@ -152,7 +156,7 @@ def __init__(self, method):
self.__api__ = True

def forward(self, *args, **kwargs):
return self._method(tensor_obj, *args, **kwargs)
return self._method(*args, **kwargs)

def __str__(self):
return self.__name__
Expand All @@ -163,7 +167,7 @@ def __str__(self):
else:
raise RuntimeError("Required module_type is in [paddle, torch], but received {}".format(method_fullname))

out = layer(*args, **kwargs)
out = layer(tensor_obj, *args, **kwargs)

handle.remove()

Expand Down
6 changes: 3 additions & 3 deletions padiff/checker/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ def __call__(self, file_list_0, file_list_1, cfg):
assert len(file_list_0) == len(
file_list_1
), f"number of tensors for compare is not equal, {len(file_list_0)} vs {len(file_list_1)}"
for path_0, path_1 in zip(file_list_0, file_list_1):
tensor_0 = load_numpy(path_0)
tensor_1 = load_numpy(path_1)
for info_0, info_1 in zip(file_list_0, file_list_1):
tensor_0 = load_numpy(info_0["path"])
tensor_1 = load_numpy(info_1["path"])
if tensor_0.size == 0 or tensor_1.size == 0:
if tensor_0.size != tensor_1.size:
raise RuntimeError("size of tensors is not equal")
Expand Down
24 changes: 21 additions & 3 deletions padiff/dump_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
import json
import os, sys
import numpy
from .utils import Counter, reset_dir
import paddle
from .utils import Counter, frames_to_string, reset_dir


dump_root_path = os.path.join(sys.path[0], "padiff_dump")
Expand Down Expand Up @@ -79,14 +80,31 @@ def dump_report_node(wrap_node, tensor_dumper):
"net_id": wrap_node.fwd_report.net_id,
},
"children": [],
"stack": frames_to_string(wrap_node.fwd_report.frames),
}
for tensor in wrap_node.fwd_report.tensors_for_compare():
file_name = tensor_dumper(tensor.detach().numpy())
node_info["fwd_outputs"].append(file_name)
node_info["fwd_outputs"].append(
{
"path": file_name,
"shape": str(tensor.shape),
"dtype": str(tensor.dtype),
"place": str(tensor.place) if isinstance(tensor, paddle.Tensor) else str(tensor.device),
"layout": str(tensor.layout),
}
)

for tensor in wrap_node.bwd_report.tensors_for_compare():
file_name = tensor_dumper(tensor.detach().numpy())
node_info["bwd_grads"].append(file_name)
node_info["bwd_grads"].append(
{
"path": file_name,
"shape": str(tensor.shape),
"dtype": str(tensor.dtype),
"place": str(tensor.place) if isinstance(tensor, paddle.Tensor) else str(tensor.device),
"layout": str(tensor.layout),
}
)

for child in wrap_node.children:
child_info = dump_report_node(child, tensor_dumper)
Expand Down
7 changes: 5 additions & 2 deletions padiff/report/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
map_structure_and_replace_key,
flatten,
for_each_grad_tensor,
extract_frame_summary,
)
import json
import numpy
Expand Down Expand Up @@ -114,10 +115,12 @@ def info_hook(model, input, output, net_id):
else:
_model = model

_, frames = extract_frame_summary()

new_in = clone_tensors(input)
new_out = clone_tensors(output)
fwd_item = report.put_item("forward", new_in, new_out, _model, net_id)
bwd_item = report.put_item("backward", new_in, new_out, _model, net_id)
fwd_item = report.put_item("forward", new_in, new_out, _model, net_id, frames)
bwd_item = report.put_item("backward", new_in, new_out, _model, net_id, frames)
bwd_item.set_forward(fwd_item)

report.stack.push_api(_model, fwd_item, bwd_item)
Expand Down
6 changes: 4 additions & 2 deletions padiff/report/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self, marker):
self.marker = marker
self.stack = LayerStack()

def put_item(self, type_, input_, output, net, net_id):
def put_item(self, type_, input_, output, net, net_id, frames):
step = self.counter.get_id()
self.items.append(
ReportItem(
Expand All @@ -35,6 +35,7 @@ def put_item(self, type_, input_, output, net, net_id):
output=output,
net=net,
net_id=net_id, # traversal order of sublayers
frames=frames,
)
)
return self.items[-1]
Expand All @@ -49,7 +50,7 @@ def __str__(self):


class ReportItem:
def __init__(self, type_, step, input_, output, net, net_id):
def __init__(self, type_, step, input_, output, net, net_id, frames):
assert type_ in [
"forward",
"backward",
Expand All @@ -65,6 +66,7 @@ def __init__(self, type_, step, input_, output, net, net_id):
self.fwd_item = None # bound to another reportitem, if self.type is "backward"
self.bwd_item = None # bound to another reportitem, if self.type is "forward"
self.input_grads = self._gen_input_grads()
self.frames = frames

def set_forward(self, fwd):
assert self.type == "backward", "can't set forward for non-backward item."
Expand Down
54 changes: 54 additions & 0 deletions padiff/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,57 @@ def get_id(self):
ret = self.id
self.id += 1
return ret


"""
tools for recording frame stack
"""


import os.path as osp
import traceback


def _is_system_package(filename):
exclude = [
"lib/python",
"/usr/local",
osp.dirname(paddle.__file__),
osp.dirname(torch.__file__),
osp.dirname(__file__), # exclude padiff
]
for pattern in exclude:
if pattern in filename:
return True
return False


def extract_frame_summary():
"""
extract the current call stack by traceback module.
gather the call information and put them into ReportItem to helper locate the error.
frame_summary:
line: line of the code
lineno: line number of the file
filename: file name of the stack
name: the function name.
"""
frame_summarys = traceback.StackSummary.extract(traceback.walk_stack(None))
last_user_fs = None
for fs in frame_summarys:
if not _is_system_package(fs.filename):
last_user_fs = fs
break
assert last_user_fs is not None, "Error happend, can't return None."
return last_user_fs, frame_summarys


def frames_to_string(frames, indent=0):
indent = " " * indent
lines = []
for f in frames:
lines.append(
"{}File {}: {} {}\n{}{}{}".format(indent, f.filename, f.lineno, f.name, indent, indent, f.line)
)
return "\n".join(lines)
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ pytest-cov
regex
pytest-xdist
torchvision
graphviz

0 comments on commit 6d6ae08

Please sign in to comment.