-
Notifications
You must be signed in to change notification settings - Fork 162
/
json-cleanup-for-pdf.py
161 lines (129 loc) · 6.39 KB
/
json-cleanup-for-pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import sys, json, re, os
# usage: json-cleanup-for-pdf.py <int>
# if int>0, disable PDF mode (only do WWW cleanup, note metadata.name still needs to be cleaned up manually)
# disableWrites = True # debugging
pdfMode = True
print(format(sys.argv))
if len(sys.argv)>1:
if int(sys.argv[1])>0:
print("WWW mode on")
pdfMode = False
fileList = [
"diffphys-code-burgers.ipynb", "diffphys-code-ns.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF
"bayesian-code.ipynb", "supervised-airfoils.ipynb", # pytorch
"reinflearn-code.ipynb", # phiflow
"physgrad-comparison.ipynb", # jax
"physgrad-code.ipynb", # pip
]
#fileList = [ "physgrad-code.ipynb"] # debug, only 1 file
#fileList = [ "diffphys-code-sol.ipynb" ] # debug
# main
for fnOut in fileList:
if not os.path.isfile(fnOut):
print("Error: "+fnOut+" not found!"); exit(1)
# create backups
fn0 = fnOut[:-5] + "bak"
fn = fn0 + "0"; cnt = 0
while os.path.isfile(fn):
#print("Error: "+fn+" already exists!"); exit(1)
print("Warning: "+fn+" already exists!")
fn = fn0 + format(cnt); cnt=cnt+1
print("renaming "+fnOut+ " to "+fn )
if os.path.isfile(fnOut):
os.rename(fnOut, fn)
if not os.path.isfile(fn):
print("Error: "+fn+" missing!")
exit(1)
with open(fn) as file:
d = json.load(file)
#print(d.keys()) #print(d["cells"][0].keys())
# remove TF / pytorch warnings, build list of regular expressions to search for
# double check, redundant with removing stderr cells (cf delE)
res = []
res.append( re.compile(r"WARNING:tensorflow:") )
res.append( re.compile(r"UserWarning:") )
res.append( re.compile(r"DeprecationWarning:") )
res.append( re.compile(r"InsecureRequestWarning") ) # for https download
res.append( re.compile(r"Building wheel") ) # phiflow install, also gives weird unicode characters
res.append( re.compile(r"warnings.warn") ) # phiflow warnings
res.append( re.compile(r"WARNING:absl") ) # jax warnings
res.append( re.compile(r"ERROR: pip") ) # pip dependencies
res.append( re.compile(r"requires imgaug") ) # pip dependencies
res.append( re.compile(r"See the documentation of nn.Upsample") ) # pip dependencies
# remove all "warnings.warn" from phiflow?
# shorten data line: "0.008612174447657694, 0.02584669669548606, 0.043136357266407785"
reD = re.compile(r"\[0.008612174447657694, 0.02584669669548606, 0.043136357266407785.+\]" )
reDt = "[0.008612174447657694, 0.02584669669548606, 0.043136357266407785 ... ]"
t="cells"
okay = 0
deletes = 0
for i in range(len(d[t])):
#for i in range(len(d[t])):
#print(d[t][0]["cell_type"])
#print(d[t][i]["cell_type"])
# remove images after code
if d[t][i]["cell_type"]=="code":
#print(d[t][i].keys())
#d[t][i]["outputs"] = ""
#print(d[t][i]["outputs"])
if pdfMode:
for j in range(len( d[t][i]["source"] )):
#print( d[t][i]["source"][j] )
#print( type(d[t][i]["source"][j] ))
dsOut = reD.sub( reDt, d[t][i]["source"][j] ) # replace long number string (only for burgers)
d[t][i]["source"][j] = dsOut
deletes = deletes+1
#print( d[t][i]["source"][j] +"\n >>> \n" +d2 )
delE = [] # collect whole entries (sections) to delete
#print(len( d[t][i]["outputs"] ))
for j in range(len( d[t][i]["outputs"] )):
#print(type( d[t][i]["outputs"][j] ))
#print( d[t][i]["outputs"][j].keys() )
# search for error stderr cells
if d[t][i]["outputs"][j]["output_type"]=="stream":
#print("output j name: "+ format( d[t][i]["outputs"][j]["name"] ) )
#print("output j: "+ format( d[t][i]["outputs"][j] ) )
if d[t][i]["outputs"][j]["name"]=="stderr":
print("stderr found! len text "+ format(len( d[t][i]["outputs"][j]["text"]) ) +", removing entry "+format(j) )
delE.append(j) # remove the whole stderr entry
# images
if d[t][i]["outputs"][j]["output_type"]=="stream":
#print("len "+ format(len( d[t][i]["outputs"][j]["text"] )) )
dell = [] # collect lines to delete
for k in range( len( d[t][i]["outputs"][j]["text"] ) ):
#print(" tout "+ d[t][i]["outputs"][j]["text"][k] ) # debug , print all lines - ACTIVATE to LOCATE errors
nums = []; all_good = True
for rr in range(len(res)):
nums.append( res[rr].search( d[t][i]["outputs"][j]["text"][k] ) )
if nums[-1] is not None:
all_good = False # skip!
if all_good:
okay = okay+1
else: # delete line "dell"
deletes = deletes+1
dell.append(d[t][i]["outputs"][j]["text"][k])
#print( format(nums) +" " + d[t][i]["outputs"][j]["text"][k] ) # len( d[t][i]["outputs"][j]["text"][k] ) )
for dl in dell:
d[t][i]["outputs"][j]["text"].remove(dl)
#print("len after "+format( len( d[t][i]["outputs"][j]["text"] )) + " A") # debug
# afterwards (potentially remove whole entries)
if len(delE)>0:
delE.sort(reverse=True)
#print("len bef "+format( len( d[t][i]["outputs"] )) + " A " + format(delE)) # debug
for de in delE:
#print(type(d[t][i]["outputs"])); print(de)
d[t][i]["outputs"].pop(de) # remove array element
deletes+=1
# if len(delE)>0:
# print("len after "+format( len( d[t][i]["outputs"] )) + " A") # debug
if deletes==0:
print("Warning: Nothing found in "+fn+"!")
if not os.path.isfile(fnOut):
os.rename(fn, fnOut)
else:
print("Error, both files exist!?")
exit(1)
else:
print(" ... writing "+fnOut )
with open(fnOut,'w') as fileOut:
json.dump(d,fileOut, indent=1, sort_keys=True)