From 04ce196760033b9c6e1f3700bc7611c07ddbd811 Mon Sep 17 00:00:00 2001 From: Vladyslav Ovchynnykov Date: Tue, 14 Sep 2021 12:20:26 +0300 Subject: [PATCH] EntryDiff scoring (#4) * Adds EntryDiff.score calculation * Update .travis.yml * Updates issues * Fixes http tx dict product * Revert "Update .travis.yml" This reverts commit a64a6e1eef8c9d5fe43baf52649ea4ad41fc7f06. * Adds UI * Adds labels coloring * Adds decimalAdjust * Moves score to Comparison * Adds soft diffs into scoring result * Removes TODO Co-authored-by: Andrey Pokhilko --- Dockerfile | 1 + README.md | 2 + harnic-spa/src/App.css | 36 ++++++++++--- harnic-spa/src/components/DiffRecordRow.js | 59 +++++++++++++++++++--- harnic-spa/src/components/ResponseData.js | 8 ++- harnic-spa/src/utils.js | 31 +++++++++++- harnic/compare/entry.py | 55 +++++++++++++++----- harnic/compare/matcher.py | 11 ++-- harnic/compare/schemas.py | 1 + harnic/compare/utils.py | 31 ++++++++++-- harnic/constants.py | 23 +++++++++ harnic/render.py | 3 ++ requirements.txt | 1 + 13 files changed, 226 insertions(+), 36 deletions(-) diff --git a/Dockerfile b/Dockerfile index 979ce5e..ebf052f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,6 +24,7 @@ ENV SPA_LOCATION=/app/harnic-spa # install dependencies COPY requirements.txt . RUN pip install -r requirements.txt +RUN python -m spacy download en_core_web_sm # install backend COPY harnic harnic diff --git a/README.md b/README.md index 955a272..d26fdff 100644 --- a/README.md +++ b/README.md @@ -64,3 +64,5 @@ docker rm harnic - Add contextual context-wrap (*Hard*) - Add request postData - Handle soft diffs missing or added as soft (*Under question*) +- Http tx scores to 0.5/0.5 +- Add score of entry and body to UI diff --git a/harnic-spa/src/App.css b/harnic-spa/src/App.css index ccdf540..f006517 100644 --- a/harnic-spa/src/App.css +++ b/harnic-spa/src/App.css @@ -24,16 +24,31 @@ background-color: rgba(0, 0, 0, 0.25) } */ -.ui.table td.warning, .ui.table tr.warning { - background: rgba(246, 157, 80, 0.2) !important; +.ui.table td.warning, .ui.table tr.warning, .label.warning { + background: rgba(246, 157, 80, 0.2) !important; } -.ui.table td.positive, .ui.table tr.positive { - background: rgba(70, 149, 74, 0.2) !important; +.ui.table td.positive, .ui.table tr.positive, .label.positive { + background: rgba(70, 149, 74, 0.2) !important; } -.ui.table td.negative, .ui.table tr.negative { - background: rgba(201, 60, 55, 0.2) !important; +.ui.table td.negative, .ui.table tr.negative, .label.negative { + background: rgba(201, 60, 55, 0.2) !important; +} + +.label.warning { + background: rgba(246, 157, 80, 0.55) !important; + color: white !important; +} + +.label.positive { + background: rgba(70, 149, 74, 0.55) !important; + color: white !important; +} + +.label.negative { + background: rgba(201, 60, 55, 0.55) !important; + color: white !important; } /* Some basic formatting */ @@ -180,7 +195,6 @@ code { } .reordering-icon { - float: right; -webkit-transform: rotate(90deg); -moz-transform: rotate(90deg); -ms-transform: rotate(90deg); @@ -188,6 +202,14 @@ code { transform: rotate(90deg); } +.entry-diff.meta-label { + float: right; +} + +.entry-diff.meta-label.reordering { + padding-right: 1px !important; +} + .truncated { text-align: center; font-family: 'Lato', auto; diff --git a/harnic-spa/src/components/DiffRecordRow.js b/harnic-spa/src/components/DiffRecordRow.js index 43a507b..8bf0769 100644 --- a/harnic-spa/src/components/DiffRecordRow.js +++ b/harnic-spa/src/components/DiffRecordRow.js @@ -1,9 +1,9 @@ import { useState } from "react"; -import { Tab, Table, Icon, Label } from "semantic-ui-react"; +import { Tab, Table, Icon, Label, Menu } from "semantic-ui-react"; import RequestData from "./RequestData.js"; import ResponseData from "./ResponseData.js"; -import { truncate } from ".././utils.js"; +import { truncate, getScoreLabelClass, decimalAdjust } from ".././utils.js"; const DiffRecordRow = ({ record }) => { const [isOpen, setIsOpen] = useState(false); @@ -31,7 +31,16 @@ const DiffRecordRow = ({ record }) => { const aPanes = record.pair.a ? [ { - menuItem: "Request", + menuItem: ( + + Request + {record.tag === "diff" && + + } + + ), render: () => ( { ), }, { - menuItem: "Response", + menuItem: ( + + Response + {record.tag === "diff" && + + } + + ), render: () => ( { response={record.pair.a.response} diff={record.diff && record.diff.comparisons.response} initialEntry={true} + score={record.diff && record.diff.score} /> ), @@ -69,7 +88,16 @@ const DiffRecordRow = ({ record }) => { const bPanes = record.pair.b ? [ { - menuItem: "Request", + menuItem: ( + + Request + {record.tag === "diff" && + + } + + ), render: () => ( { ), }, { - menuItem: "Response", + menuItem: ( + + Response + {record.tag === "diff" && + + } + + ), render: () => ( { response={record.pair.b.response} diff={record.diff && record.diff.comparisons.response} initialEntry={false} + score={record.diff && record.diff.score} /> ), @@ -124,8 +162,15 @@ const DiffRecordRow = ({ record }) => { )} {record.is_reordering && ( - + )} + {record.tag === "diff" && + + } {record.pair.b && ( diff --git a/harnic-spa/src/components/ResponseData.js b/harnic-spa/src/components/ResponseData.js index 34f4d6c..4524f32 100644 --- a/harnic-spa/src/components/ResponseData.js +++ b/harnic-spa/src/components/ResponseData.js @@ -11,7 +11,7 @@ import { DateTime } from "luxon"; import regexifyString from "regexify-string"; import ModalScrollingContent from "./ModalScrollingContent.js"; -import { truncate, calculateDiffClass } from ".././utils.js"; +import { truncate, calculateDiffClass, getScoreLabelClass, decimalAdjust } from ".././utils.js"; const ContentText = ({ value, request }) => { if (value === null) { @@ -69,6 +69,7 @@ const ResponseData = ({ response, diff, initialEntry, + score, }) => { const cmpIdx = initialEntry ? 0 : 1; @@ -274,6 +275,11 @@ const ResponseData = ({
Content: + {score && + + }
{Object.entries(response.content).map(([key, value]) => { diff --git a/harnic-spa/src/utils.js b/harnic-spa/src/utils.js index b63e470..f8cda62 100644 --- a/harnic-spa/src/utils.js +++ b/harnic-spa/src/utils.js @@ -20,5 +20,34 @@ const calculateDiffClass = (diff, criteria, key) => { return keyClass; }; - export { truncate, calculateDiffClass }; +const getScoreLabelClass = (score) => { + if (score < 0.25) { + return 'negative'; + } else if (score == 1) { + return 'positive'; + } else { + return 'warning'; + } +}; + +const decimalAdjust = (type, value, exp) => { + // If the exp is undefined or zero... + if (typeof exp === 'undefined' || +exp === 0) { + return Math[type](value); + } + value = +value; + exp = +exp; + // If the value is not a number or the exp is not an integer... + if (isNaN(value) || !(typeof exp === 'number' && exp % 1 === 0)) { + return NaN; + } + // Shift + value = value.toString().split('e'); + value = Math[type](+(value[0] + 'e' + (value[1] ? (+value[1] - exp) : -exp))); + // Shift back + value = value.toString().split('e'); + return +(value[0] + 'e' + (value[1] ? (+value[1] + exp) : exp)); +} + +export { truncate, calculateDiffClass, getScoreLabelClass, decimalAdjust }; \ No newline at end of file diff --git a/harnic/compare/entry.py b/harnic/compare/entry.py index 1c2f969..6b84bf9 100644 --- a/harnic/compare/entry.py +++ b/harnic/compare/entry.py @@ -1,7 +1,7 @@ from functools import partial -from harnic.compare.utils import dict_compare, qp_compare, scalars_compare, content_compare -from harnic.constants import SOFT_HEADER_KEYS +from harnic.compare.utils import content_compare, dict_compare, dict_product, qp_compare, scalars_compare +from harnic.constants import SCORE_COEFS, SCORE_HTTP_TX_TYPE_COEFS, SOFT_HEADER_KEYS headers_compare = partial(dict_compare, exceptions=SOFT_HEADER_KEYS, exculde_values=True) @@ -13,25 +13,52 @@ def __init__(self, a, b): self.a = a self.b = b self.equal = None + self.score = {} self.comparisons = self._get_diff() def _get_diff(self): # method and url are not handled here as long as they are part of the entry hash comparisons = {'request': {}, 'response': {}} - comparisons['request']['bodySize'] = scalars_compare(self.a.request['bodySize'], - self.b.request['bodySize']) - comparisons['request']['query_params'] = qp_compare(self.a.request['url'].query_params, - self.b.request['url'].query_params) - comparisons['request']['headers'] = headers_compare(self.a.request['headers'], - self.b.request['headers']) + diff_score = { + 'request': {}, + 'response': {}, + } - comparisons['response']['status'] = scalars_compare(self.a.response['status'], - self.b.response['status']) - comparisons['response']['headers'] = headers_compare(self.a.response['headers'], - self.b.response['headers']) - comparisons['response']['content'] = content_compare(self.a.response, - self.b.response) + cmp = scalars_compare(self.a.request['url'].url, self.b.request['url'].url) + diff_score['request']['url'] = cmp.score + + cmp = scalars_compare(self.a.request['bodySize'], self.b.request['bodySize']) + comparisons['request']['bodySize'] = cmp + + cmp = qp_compare(self.a.request['url'].query_params, self.b.request['url'].query_params) + comparisons['request']['query_params'], diff_score['request']['query_params'] = cmp, cmp.score + + cmp = headers_compare(self.a.request['headers'], self.b.request['headers']) + comparisons['request']['headers'], diff_score['request']['headers'] = cmp, cmp.score + + # TODO: implement postData cmp + diff_score['request']['postData'] = 1 # Treat same for now + + cmp = scalars_compare(self.a.response['status'], self.b.response['status']) + comparisons['response']['status'], diff_score['response']['status'] = cmp, cmp.score + + cmp = headers_compare(self.a.response['headers'], self.b.response['headers']) + comparisons['response']['headers'], diff_score['response']['headers'] = cmp, cmp.score + + cmp = content_compare(self.a.response, self.b.response) + comparisons['response']['content'], diff_score['response']['content'] = cmp, cmp.score self.equal = all(all(cmp.equal for cmp in criteria.values()) for criteria in comparisons.values()) + + self.score['full'] = diff_score + diff_score_with_coefs = { + 'request': sum(dict_product(diff_score['request'], SCORE_COEFS['request']).values()), + 'response': sum(dict_product(diff_score['response'], SCORE_COEFS['response']).values()), + } + self.score['by_http_tx_type'] = diff_score_with_coefs + diff_score_with_coefs = dict_product(diff_score_with_coefs, SCORE_HTTP_TX_TYPE_COEFS) + final_score = sum(diff_score_with_coefs.values()) + self.score['final'] = final_score + return comparisons diff --git a/harnic/compare/matcher.py b/harnic/compare/matcher.py index c0b2b49..c67f23d 100644 --- a/harnic/compare/matcher.py +++ b/harnic/compare/matcher.py @@ -64,6 +64,7 @@ def _build_files_diff(opcodes, file1, file2): PermTag.DIFF: 0, PermTag.INSERT: 0, PermTag.DELETE: 0, + '_diff_scores_sum': 0, } perms_total = _calculate_permutations_total_number(opcodes) with tqdm(total=perms_total, desc='Constructing diff records') as pbar: @@ -77,6 +78,8 @@ def _build_files_diff(opcodes, file1, file2): dr = DiffRecord(pair, entry_diff, tag_selector) records.append(dr) stats[tag_selector] += 1 + if tag_selector == PermTag.DIFF: + stats['_diff_scores_sum'] += entry_diff.score['final'] pbar.update() elif tag == 'replace': for i in range(i1, i2): @@ -101,7 +104,8 @@ def _build_files_diff(opcodes, file1, file2): records.append(dr) stats[PermTag.INSERT] += 1 pbar.update() - stats['ratio'] = 2.0 * stats[PermTag.EQUAL] / (len(file1) + len(file2)) + + stats['ratio'] = 2.0 * (stats[PermTag.EQUAL] + stats['_diff_scores_sum']) / (len(file1) + len(file2)) reorders = _calculate_reorders(records) reorders_stats = _calculate_reorders_stats(reorders, stats, (file1, file2)) @@ -109,7 +113,6 @@ def _build_files_diff(opcodes, file1, file2): 'with_reorders': reorders_stats, 'strict_order': stats, } - return records, reorders, stats @@ -162,7 +165,9 @@ def _calculate_reorders_stats(reorders, stats, files): for reorder in reorders: tag_selector = PermTag.EQUAL if reorder['entry_diff'].equal else PermTag.DIFF stats[tag_selector] += 1 - stats['ratio'] = 2.0 * stats[PermTag.EQUAL] / (len(file1) + len(file2)) + if tag_selector == PermTag.DIFF: + stats['_diff_scores_sum'] += reorder['entry_diff'].score['final'] + stats['ratio'] = 2.0 * (stats[PermTag.EQUAL] + stats['_diff_scores_sum']) / (len(file1) + len(file2)) return stats diff --git a/harnic/compare/schemas.py b/harnic/compare/schemas.py index 35dfd7a..7c969d2 100644 --- a/harnic/compare/schemas.py +++ b/harnic/compare/schemas.py @@ -19,6 +19,7 @@ class ComparisonSchema(Schema): class EntryDiffSchema(Schema): equal = fields.Bool() comparisons = fields.Dict(values=fields.Dict(values=fields.Nested('ComparisonSchema'))) + score = fields.Dict() class PairSchema(Schema): diff --git a/harnic/compare/utils.py b/harnic/compare/utils.py index 022beed..c96b1b7 100644 --- a/harnic/compare/utils.py +++ b/harnic/compare/utils.py @@ -1,11 +1,16 @@ from difflib import _mdiff +import spacy + +nlp = spacy.load('en_core_web_sm') + class Comparison: - def __init__(self, equal, strict_equal, diff): + def __init__(self, equal, strict_equal, diff, score): self.equal = equal self.strict_equal = strict_equal self.diff = diff + self.score = score class DictDiff: @@ -29,6 +34,10 @@ def split_diff(fromlines, tolines, **kwargs): return fromlist, tolist, flaglist +def dict_product(d1, d2): + return {key: d1.get(key, 0) * d2[key] for key in d2.keys()} + + def dict_compare(d1, d2, exceptions=(), exculde_values=False): d1_keys = set(d1.keys()) d2_keys = set(d2.keys()) @@ -46,12 +55,18 @@ def dict_compare(d1, d2, exceptions=(), exculde_values=False): else: equal = {k: v for k, v in d1.items() if k not in exceptions} == \ {k: v for k, v in d2.items() if k not in exceptions} - return Comparison(equal, d1 == d2, DictDiff(added, removed, modified, same)) + + try: + total_soft_diffs = len([v for v in modified.values() if v[2]]) + score = 2.0 * (len(same) + total_soft_diffs) / (len(d1_keys) + len(d2_keys)) + except ZeroDivisionError: + score = 1 + return Comparison(equal, d1 == d2, DictDiff(added, removed, modified, same), score) def scalars_compare(s1, s2): equal = strict_equal = s1 == s2 - return Comparison(equal, strict_equal, None) + return Comparison(equal, strict_equal, None, int(equal)) def qp_compare(qp1, qp2): @@ -61,6 +76,13 @@ def qp_compare(qp1, qp2): return cmp +def text_compare(t1, t2): + doc1 = nlp(t1) + doc2 = nlp(t2) + score = doc2.similarity(doc1) + return score + + def content_compare(r1, r2): c1, raw1 = r1['content'], r1.get('raw_body') c2, raw2 = r2['content'], r2.get('raw_body') @@ -70,16 +92,19 @@ def content_compare(r1, r2): cmp = dict_compare(c1, c2, exceptions=keys) text_modified = cmp.diff.modified.get('text', ()) + cmp.score = 1 if text_modified and None not in text_modified: try: diff = split_diff(c1['text'].splitlines(), c2['text'].splitlines()) except KeyError: pass else: + cmp.score = text_compare(c1['text'], c2['text']) cmp.diff.modified['text'] = diff else: # We should check for raw equality in case cleaned failed if raw1 != raw2: + cmp.score = text_compare(c1['text'], c2['text']) # Values are skipped anyway so we just mark a diff without inner explanation cmp.diff.modified['text'] = None diff --git a/harnic/constants.py b/harnic/constants.py index 819a88d..e6f4db1 100644 --- a/harnic/constants.py +++ b/harnic/constants.py @@ -41,3 +41,26 @@ PARTIAL_MATCH_CUTOFF = 0.5 FANCY_REPLACE_THRESHOLD_LEN = 256 + +SCORE_COEFS = { + 'request': { + 'url': 0.1, + 'query_params': 0.3, + 'headers': 0.2, + 'postData': 0.4, + }, + 'response': { + 'status': 0.25, + 'headers': 0.3, + 'content': 0.45, + } +} + +SCORE_HTTP_TX_TYPE_COEFS = { + 'request': 0.4, + 'response': 0.6, +} + +assert sum(SCORE_COEFS['request'].values()) == 1 +assert sum(SCORE_COEFS['response'].values()) == 1 +assert sum(SCORE_HTTP_TX_TYPE_COEFS.values()) == 1 diff --git a/harnic/render.py b/harnic/render.py index a4436a2..b04c11a 100644 --- a/harnic/render.py +++ b/harnic/render.py @@ -35,3 +35,6 @@ def render_diff_to_json(diff, format='compact'): file_js.write('window.globalData = ') file_js.writelines(l for l in file_json) file_js.write(';') + + from harnic.helpers import stats_report + print(stats_report(diff)) diff --git a/requirements.txt b/requirements.txt index ac38295..02c3518 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ six==1.16.0 tabulate==0.8.9 tqdm==4.62.1 termcolor==1.1.0 +spacy==3.1.2