diff --git a/SLTev/ASReval.py b/SLTev/ASReval.py index 6831f2b..6011e0a 100644 --- a/SLTev/ASReval.py +++ b/SLTev/ASReval.py @@ -5,6 +5,7 @@ from utilities import check_empty_line, osst_checking, pipeline_input, get_SLTev_home_path from utilities import check_time_stamp_candiates_format, submission_argument, parity_checking_between_ostt_reference from evaluator import normal_timestamp_evaluation, simple_timestamp_evaluation +from evaluator import normal_evaluation_without_parity from ASRev import simple_asr_evaluation, normal_asr_evaluation @@ -57,21 +58,24 @@ def main(input_files=[], file_formats=[], simple="False"): continue if osst_checking(gold_files["ostt"][0]) == 1: # OStt checking continue + + + inputs_object = { + 'ostt': gold_files["ostt"][0], + 'references': gold_files["ost"], + 'SLTev_home': SLTev_home, + 'candidate': candidate_file[0] + } + + _ = check_time_stamp_candiates_format(candidate_file[0]) # submission checking + parity_state, error = parity_checking_between_ostt_reference(gold_files["ostt"][0], gold_files["ost"]) if parity_state == 0: - eprint( - "Evaulation for ", - candidate_file[0], - " failed, the number of Complete lines (C) in ", - gold_files["ostt"][0], - " and ", - " ".join(gold_files["ost"]), - " are not equal", - ) eprint(error) + normal_evaluation_without_parity(inputs_object) continue - _ = check_time_stamp_candiates_format(candidate_file[0]) # submission checking + print( "Evaluating the file ", candidate_file[0], @@ -79,12 +83,6 @@ def main(input_files=[], file_formats=[], simple="False"): " ".join(gold_files["ost"]), ) - inputs_object = { - 'ostt': gold_files["ostt"][0], - 'references': gold_files["ost"], - 'SLTev_home': SLTev_home, - 'candidate': candidate_file[0] - } if simple == "False": normal_timestamp_evaluation(inputs_object) else: diff --git a/SLTev/SLTeval.py b/SLTev/SLTeval.py index 8ad69d5..e134d6f 100644 --- a/SLTev/SLTeval.py +++ b/SLTev/SLTeval.py @@ -4,7 +4,7 @@ from utilities import check_empty_line, osst_checking, pipeline_input, get_SLTev_home_path from utilities import check_time_stamp_candiates_format, submission_argument, parity_checking_between_ostt_reference from evaluator import normal_timestamp_evaluation, simple_timestamp_evaluation - +from evaluator import normal_evaluation_without_parity def reference_checking(gold_files): check_all_gold_files_flag = 0 @@ -36,23 +36,23 @@ def main(input_files=[], file_formats=[], simple="False"): if osst_checking(gold_files["ostt"][0]) == 1: # OStt checking continue - parity_state, error = parity_checking_between_ostt_reference(gold_files["ostt"][0], gold_files["ref"]) - if parity_state == 0: - eprint( - "evaluation for ", - candidate_file[0], - " failed, the number of Complete lines (C) in ", - gold_files["ostt"][0], - " and ", - " ".join(gold_files["ref"]), - " are not equal", - ) - eprint(error) - continue + evaluation_object = { + 'ostt':gold_files["ostt"][0], + 'references': gold_files["ref"], + 'candidate': candidate_file[0], + 'align': gold_files["align"], + 'SLTev_home': sltev_home, + } state = check_time_stamp_candiates_format(candidate_file[0]) # candidate checking if state: continue + + parity_state, error = parity_checking_between_ostt_reference(gold_files["ostt"][0], gold_files["ref"]) + if parity_state == 0: + eprint(error) + normal_evaluation_without_parity(evaluation_object) + continue if ( gold_files["align"] != [] and @@ -72,15 +72,6 @@ def main(input_files=[], file_formats=[], simple="False"): " ".join(gold_files["ref"]), ) - - evaluation_object = { - 'ostt':gold_files["ostt"][0], - 'references': gold_files["ref"], - 'candidate': candidate_file[0], - 'align': gold_files["align"], - 'SLTev_home': sltev_home, - } - if simple == "False": normal_timestamp_evaluation(evaluation_object) else: diff --git a/SLTev/__init__.py b/SLTev/__init__.py index c19d775..a1f8257 100644 --- a/SLTev/__init__.py +++ b/SLTev/__init__.py @@ -5,4 +5,4 @@ sltev_home = pkg_resources.resource_filename("SLTev", "") sys.path.insert(1, sltev_home) -__version__ = "1.2.1" +__version__ = "1.2.2" diff --git a/SLTev/evaluator.py b/SLTev/evaluator.py index caa0e81..e72d818 100644 --- a/SLTev/evaluator.py +++ b/SLTev/evaluator.py @@ -267,6 +267,44 @@ def time_span_bleu_score_evaluation(evaluation_object): except: pass + +def normal_evaluation_without_parity(inputs_object): + print_headers() + current_path = os.getcwd() + MovedWords = 1 # count of moving words to each side when we are using Time-based segmentation and word-based segmentation + references = read_references(inputs_object.get('references', [])) + references_statistical_info(references) # print statistical info + average_refernces_token_count = get_average_references_token_count(references) + candidate_sentences = read_candidate_file(inputs_object.get('candidate')) + + evaluation_object = { + 'candidate_sentences': candidate_sentences, + 'language': 'en', + 'SLTev_home': inputs_object.get('SLTev_home'), + 'current_path': current_path, + 'MovedWords': MovedWords, + 'average_refernces_token_count': average_refernces_token_count, + 'references': references, + 'time_span': 3000 + } + + # bleu score evaluation + documantlevel_bleu_score_evaluation(references, candidate_sentences) + wordbased_segmenter_bleu_score_evaluation(evaluation_object) + + #flicker evaluation + print("tot Flicker count_changed_Tokens ", int(calc_revise_count(candidate_sentences))) + print("tot Flicker count_changed_content ", int(calc_flicker_score(candidate_sentences))) + print( + "mean flicker across sentences ", + str("{0:.3f}".format(round(calc_average_flickers_per_sentence(candidate_sentences), 3))), + ) + print( + "mean flicker across whole documents ", + str("{0:.3f}".format(round(calc_average_flickers_per_tokens(candidate_sentences), 3))), + ) + + def simple_timestamp_evaluation(inputs_object): current_path = os.getcwd() MovedWords = 1 # count of moving words to each side when we are using Time-based segmentation and word-based segmentation diff --git a/SLTev/files_modules.py b/SLTev/files_modules.py index 9e5ff42..7171084 100644 --- a/SLTev/files_modules.py +++ b/SLTev/files_modules.py @@ -265,4 +265,3 @@ def quality_segmenter(evaluation_object, temp_folder): os.chdir("..") shutil.rmtree(temp_folder) return candidate_segments, mWERQuality - diff --git a/SLTev/quality_modules.py b/SLTev/quality_modules.py index b7ac790..6fbffb1 100644 --- a/SLTev/quality_modules.py +++ b/SLTev/quality_modules.py @@ -50,7 +50,7 @@ def calc_bleu_score_segmenterlevel(evaluation_object, temp_folder): segmenter_sentence, _ = quality_segmenter(evaluation_object, temp_folder) sys = [" ".join(i) for i in segmenter_sentence] refs = merge_references_sentences[:] - bleu_sacre = sacrebleu.corpus_bleu(sys, refs) + bleu_sacre = sacrebleu.corpus_bleu(sys, refs, force=True) sacre_bleu_score = bleu_sacre.score return sacre_bleu_score