From 503d02f6d812b7dc6babdcbcce911daa7ba055a8 Mon Sep 17 00:00:00 2001 From: venvis <127123047+venvis@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:43:26 +0400 Subject: [PATCH] Update ECHR_metadata_harvester.py --- .../echr_extractor/ECHR_metadata_harvester.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/echr/echr_extractor/ECHR_metadata_harvester.py b/echr/echr_extractor/ECHR_metadata_harvester.py index 58eec23..3fdf371 100644 --- a/echr/echr_extractor/ECHR_metadata_harvester.py +++ b/echr/echr_extractor/ECHR_metadata_harvester.py @@ -129,8 +129,25 @@ def advanced_function(term, values): "languageisocode": basic_function } + start = link.index("{") - link_dictionary = eval(link[start:]) + end = link.rindex("}") + json_str = link[start:end+1].replace("'", '"') + + try: + link_dictionary = json.loads(json_str) + except json.JSONDecodeError: + + print(f"Failed to parse JSON: {json_str}") + link_dictionary = {} + pairs = json_str.strip('{}').split(',') + for pair in pairs: + key, value = pair.split(':', 1) + key = key.strip().strip('"') + value = value.strip().strip('[]').split(',') + link_dictionary[key] = [v.strip().strip('"') for v in value] + + base_query = 'https://hudoc.echr.coe.int/app/query/results?query=contentsitename:ECHR' \ ' AND (NOT (doctype=PR OR doctype=HFCOMOLD OR doctype=HECOMOLD)) AND ' \ 'inPutter&select={select}&sort=itemid%20Ascending&start={start}&length={length}'