-
Notifications
You must be signed in to change notification settings - Fork 2
/
process_warnings.py
246 lines (219 loc) · 9.33 KB
/
process_warnings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
"""
Script to process pytest warnings output by pytest-json-report plugin and output it as a html
"""
from __future__ import absolute_import
from __future__ import print_function
import json
import os
import io
import re
import argparse
from collections import Counter
import pandas as pd
from write_to_html import (
HtmlOutlineWriter,
) # noqa pylint: disable=import-error,useless-suppression
columns = [
"message",
"category",
"filename",
"lineno",
"high_location",
"label",
"num",
"deprecated",
]
columns_index_dict = {key: index for index, key in enumerate(columns)}
def seperate_warnings_by_location(warnings_data):
"""
Warnings originate from multiple locations, this function takes in list of warning objects
and separates them based on their filename location
"""
# first create regex for each n file location
warnings_locations = {
".*/python\d\.\d/site-packages/.*\.py": "python", # noqa pylint: disable=W1401
".*/edx-platform/lms/.*\.py": "lms", # noqa pylint: disable=W1401
".*/edx-platform/openedx/.*\.py": "openedx", # noqa pylint: disable=W1401
".*/edx-platform/cms/.*\.py": "cms", # noqa pylint: disable=W1401
".*/edx-platform/common/.*\.py": "common", # noqa pylint: disable=W1401
}
# separate into locations flow:
# - iterate through each wanring_object, see if its filename matches any regex in warning locations.
# - If so, change high_location index on warnings_object to location name
for warnings_object in warnings_data:
warning_origin_located = False
for key in warnings_locations:
if (
re.search(key, warnings_object[columns_index_dict["filename"]])
is not None
):
warnings_object[
columns_index_dict["high_location"]
] = warnings_locations[key]
warning_origin_located = True
break
if not warning_origin_located:
warnings_object[columns_index_dict["high_location"]] = "other"
return warnings_data
def convert_warning_dict_to_list(warning_dict):
"""
converts our data dict into our defined list based on columns defined at top of this file
"""
output = []
for column in columns:
if column in warning_dict:
output.append(warning_dict[column])
else:
output.append(None)
output[columns_index_dict["num"]] = 1
return output
def read_warning_data(dir_path):
"""
During test runs in jenkins, multiple warning json files are output. This function finds all files
and aggregates the warnings in to one large list
"""
# pdb.set_trace()
dir_path = os.path.expanduser(dir_path)
# find all files that exist in given directory
files_in_dir = [
f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f))
]
warnings_files = []
# TODO(jinder): currently this is hard-coded in, maybe create a constants file with info
# THINK(jinder): but creating file for one constant seems overkill
warnings_file_name_regex = (
"pytest_warnings_?\d*\.json" # noqa pylint: disable=W1401
)
# iterate through files_in_dir and see if they match our know file name pattern
for temp_file in files_in_dir:
if re.search(warnings_file_name_regex, temp_file) is not None:
warnings_files.append(temp_file)
# go through each warning file and aggregate warnings into warnings_data
warnings_data = []
for temp_file in warnings_files:
with io.open(os.path.expanduser(dir_path + "/" + temp_file), "r") as read_file:
json_input = json.load(read_file)
if "warnings" in json_input:
data = [
convert_warning_dict_to_list(warning_dict)
for warning_dict in json_input["warnings"]
]
warnings_data.extend(data)
else:
print(temp_file)
return warnings_data
def compress_similar_warnings(warnings_data):
"""
find all warnings that are exactly the same, count them, and return set with count added to each warning
"""
tupled_data = [tuple(data) for data in warnings_data]
test_counter = Counter(tupled_data)
output = [list(value) for value in test_counter.keys()]
for data_object in output:
data_object[columns_index_dict["num"]] = test_counter[tuple(data_object)]
return output
def process_warnings_json(dir_path):
"""
Master function to process through all warnings and output a dict
dict structure:
{
location: [{warning text: {file_name: warning object}}]
}
flow:
- Aggregate data from all warning files
- Separate warnings by deprecated vs non deprecated(has word deprecate in it)
- Further categorize warnings
- Return output
Possible Error/enhancement: there might be better ways to separate deprecates vs
non-deprecated warnings
"""
warnings_data = read_warning_data(dir_path)
for warnings_object in warnings_data:
warnings_object[columns_index_dict["deprecated"]] = bool(
"deprecated" in warnings_object[columns_index_dict["message"]]
)
warnings_data = seperate_warnings_by_location(warnings_data)
compressed_warnings_data = compress_similar_warnings(warnings_data)
return compressed_warnings_data
def group_and_sort_by_sumof(dataframe, group, sort_by):
groups_by = dataframe.groupby(group)
temp_list_to_sort = [(key, value, value[sort_by].sum()) for key, value in groups_by]
# sort by count
return sorted(temp_list_to_sort, key=lambda x: -x[2])
def write_html_report(warnings_dataframe, html_path):
"""
converts from panda dataframe to our html
"""
html_path = os.path.expanduser(html_path)
if "/" in html_path:
location_of_last_dir = html_path.rfind("/")
dir_path = html_path[:location_of_last_dir]
os.makedirs(dir_path, exist_ok=True)
with io.open(html_path, "w") as fout:
html_writer = HtmlOutlineWriter(fout)
category_sorted_by_count = group_and_sort_by_sumof(
warnings_dataframe, "category", "num"
)
for category, group_in_category, category_count in category_sorted_by_count:
# xss-lint: disable=python-wrap-html
html = u'<span class="count">{category}, count: {count}</span> '.format(
category=category, count=category_count
)
html_writer.start_section(html, klass=u"category")
locations_sorted_by_count = group_and_sort_by_sumof(
group_in_category, "high_location", "num"
)
for (
location,
group_in_location,
location_count,
) in locations_sorted_by_count:
# xss-lint: disable=python-wrap-html
html = u'<span class="count">{location}, count: {count}</span> '.format(
location=location, count=location_count
)
html_writer.start_section(html, klass=u"location")
message_group_sorted_by_count = group_and_sort_by_sumof(
group_in_location, "message", "num"
)
for (
message,
message_group,
message_count,
) in message_group_sorted_by_count:
# xss-lint: disable=python-wrap-html
html = u'<span class="count">{warning_text}, count: {count}</span> '.format(
warning_text=message, count=message_count
)
html_writer.start_section(html, klass=u"warning_text")
# warnings_object[location][warning_text] is a list
for _, warning in message_group.iterrows():
# xss-lint: disable=python-wrap-html
html = u'<span class="count">{warning_file_path}</span> '.format(
warning_file_path=warning["filename"]
)
html_writer.start_section(html, klass=u"warning")
# xss-lint: disable=python-wrap-html
html = u'<p class="lineno">lineno: {lineno}</p> '.format(
lineno=warning["lineno"]
)
html_writer.write(html)
# xss-lint: disable=python-wrap-html
html = u'<p class="num">num_occur: {num}</p> '.format(
num=warning["num"]
)
html_writer.write(html)
html_writer.end_section()
html_writer.end_section()
html_writer.end_section()
html_writer.end_section()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Process and categorize pytest warnings and output html report."
)
parser.add_argument("--dir-path", default="test_root/log")
parser.add_argument("--html-path", default="test_html.html")
args = parser.parse_args()
data_output = process_warnings_json(args.dir_path)
data_dataframe = pd.DataFrame(data=data_output, columns=columns)
write_html_report(data_dataframe, args.html_path)