From a383c78643619cc9e5bc185189929034ec69a611 Mon Sep 17 00:00:00 2001 From: Christian Newman Date: Tue, 26 Nov 2024 21:58:04 -0500 Subject: [PATCH] Small modifications due to weird changes in output in SWUM --- HTTPRequest | 2 +- SWUM | 2 +- ensemble_tagger_implementation/ensemble_functions.py | 1 + ensemble_tagger_implementation/preprocess_identifiers.py | 7 ++++--- srcSAXEventDispatch | 2 +- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/HTTPRequest b/HTTPRequest index a9f085c..0ff67db 160000 --- a/HTTPRequest +++ b/HTTPRequest @@ -1 +1 @@ -Subproject commit a9f085c0279c28c051e890182afad2f3f850ebe1 +Subproject commit 0ff67dbb7516e25d4b2e7bd133853361a84c85d2 diff --git a/SWUM b/SWUM index a5b3c57..4f8a381 160000 --- a/SWUM +++ b/SWUM @@ -1 +1 @@ -Subproject commit a5b3c57b7d5847e6625380f2298311a906ff9acf +Subproject commit 4f8a381c913606ffc9af9c79beb1eff8f5a9af66 diff --git a/ensemble_tagger_implementation/ensemble_functions.py b/ensemble_tagger_implementation/ensemble_functions.py index 404b6df..59d81a4 100644 --- a/ensemble_tagger_implementation/ensemble_functions.py +++ b/ensemble_tagger_implementation/ensemble_functions.py @@ -26,6 +26,7 @@ def Process_identifier_with_swum(identifier_data, context_of_identifier): split_identifier_name = '_'.join(ronin.split(identifier_type_and_name[1])) if Get_identifier_context(context_of_identifier) != CODE_CONTEXT.FUNCTION: swum_string = "{identifier_type} {identifier_name}".format(identifier_name = split_identifier_name, identifier_type = identifier_type_and_name[0]) + print(['java', '-jar', '../SWUM/SWUM_POS/swum.jar', swum_string, '2', 'true']) swum_process = subprocess.Popen(['java', '-jar', '../SWUM/SWUM_POS/swum.jar', swum_string, '2', 'true'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) else: split_identifier_name = split_identifier_name+'('+identifier_data.split('(')[1] diff --git a/ensemble_tagger_implementation/preprocess_identifiers.py b/ensemble_tagger_implementation/preprocess_identifiers.py index aa6aa32..b524c88 100644 --- a/ensemble_tagger_implementation/preprocess_identifiers.py +++ b/ensemble_tagger_implementation/preprocess_identifiers.py @@ -20,13 +20,14 @@ def Split_raw_identifier(identifier_data): return identifier_type_and_name def Parse_swum(swum_output, split_identifier_name): - code_context = swum_output.split('#') + code_context = swum_output.split(':') + print(code_context) raw_grammar_pattern = grammar_pattern = identifier = [] if code_context[0] == 'FIELD': - identifier = code_context[1].split('-')[1].split() + identifier = code_context[2].split('-')[1].split() raw_grammar_pattern = re.findall('([A-Z]+)', ' '.join(identifier)) else: - identifier = code_context[1].split('@')[1].split('|') + identifier = code_context[3].split('|') raw_grammar_pattern = re.findall('([A-Z]+)', ' '.join(identifier)) for pos in raw_grammar_pattern: diff --git a/srcSAXEventDispatch b/srcSAXEventDispatch index 2aeac9a..75ef0af 160000 --- a/srcSAXEventDispatch +++ b/srcSAXEventDispatch @@ -1 +1 @@ -Subproject commit 2aeac9a5101f08840956dd66cb827ed4f23efed0 +Subproject commit 75ef0af0e9d6ebba1816e674c6b13f6910689542