From d9b50b269f0e26fc3f13466edb1c4e7ad8e3eb02 Mon Sep 17 00:00:00 2001 From: Afonso Oliveira Date: Fri, 13 Dec 2024 14:12:57 +0000 Subject: [PATCH 1/7] Add simple Docker environment variable Signed-off-by: Afonso Oliveira --- ext/auto-inst/parsing.py | 328 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 ext/auto-inst/parsing.py diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py new file mode 100644 index 000000000..be1787874 --- /dev/null +++ b/ext/auto-inst/parsing.py @@ -0,0 +1,328 @@ +import os +import json +import re +import sys +from collections import defaultdict +import yaml + +REPO_INSTRUCTIONS = {} +REPO_DIRECTORY = None + +def safe_get(data, key, default=""): + """Safely get a value from a dictionary, return default if not found or error.""" + try: + if isinstance(data, dict): + return data.get(key, default) + return default + except: + return default + +def load_yaml_encoding(instr_name): + """ + Given an instruction name (from JSON), find the corresponding YAML file and load its encoding data. + We'll try to match the instr_name to a YAML file by using REPO_INSTRUCTIONS and transformations. + """ + candidates = set() + lower_name = instr_name.lower() + candidates.add(lower_name) + candidates.add(lower_name.replace('_', '.')) + + yaml_file_path = None + yaml_category = None + for cand in candidates: + if cand in REPO_INSTRUCTIONS: + yaml_category = REPO_INSTRUCTIONS[cand] + yaml_file_path = os.path.join(REPO_DIRECTORY, yaml_category, cand + ".yaml") + if os.path.isfile(yaml_file_path): + break + else: + yaml_file_path = None + + if not yaml_file_path or not os.path.isfile(yaml_file_path): + # YAML not found + return None, None + + # Load the YAML file + with open(yaml_file_path, 'r') as yf: + ydata = yaml.safe_load(yf) + + encoding = safe_get(ydata, 'encoding', {}) + yaml_match = safe_get(encoding, 'match', None) + yaml_vars = safe_get(encoding, 'variables', []) + + return yaml_match, yaml_vars + +def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str): + """ + Compare the YAML encoding (match + vars) with the JSON encoding (binary format). + Return a list of differences. + """ + if not yaml_match: + return ["No YAML match field available for comparison."] + if not json_encoding_str: + return ["No JSON encoding available for comparison."] + + yaml_pattern_str = yaml_match.replace('-', '.') + if len(yaml_pattern_str) != 32: + return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected 32. Cannot compare properly."] + + def parse_location(loc_str): + high, low = loc_str.split('-') + return int(high), int(low) + + yaml_var_positions = {} + for var in yaml_vars: + high, low = parse_location(var["location"]) + yaml_var_positions[var["name"]] = (high, low) + + # Tokenize JSON encoding + tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\])?)', json_encoding_str) + json_bits = [] + bit_index = 31 + for t in tokens: + json_bits.append((bit_index, t)) + bit_index -= 1 + + if bit_index != -1: + return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."] + + differences = [] + + # Check fixed bits + for b in range(32): + yaml_bit = yaml_pattern_str[31 - b] + token = [tt for (pos, tt) in json_bits if pos == b] + if not token: + differences.append(f"Bit {b}: No corresponding JSON bit found.") + continue + json_bit_str = token[0] + + if yaml_bit in ['0', '1']: + if json_bit_str not in ['0', '1']: + differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'") + elif json_bit_str != yaml_bit: + differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'") + else: + # Variable bit in YAML + if json_bit_str in ['0', '1']: + differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'") + + # Check variable fields + for var_name, (high, low) in yaml_var_positions.items(): + json_var_fields = [] + for bb in range(low, high+1): + token = [tt for (pos, tt) in json_bits if pos == bb] + if token: + json_var_fields.append(token[0]) + else: + json_var_fields.append('?') + + field_names = set(re.findall(r'([A-Za-z0-9]+)\[\d+\]', ' '.join(json_var_fields))) + if len(field_names) == 0: + differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}") + elif len(field_names) > 1: + differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}") + + return differences + +def safe_print_instruction_details(name: str, data: dict, output_stream): + """Print formatted instruction details and compare YAML/JSON encodings.""" + try: + # Print the instruction details without separating by category + output_stream.write(f"\n{name} Instruction Details\n") + output_stream.write("=" * 50 + "\n") + + # Basic Information + output_stream.write("\nBasic Information:\n") + output_stream.write("-" * 20 + "\n") + output_stream.write(f"Name: {name}\n") + output_stream.write(f"Assembly Format: {safe_get(data, 'AsmString', 'N/A')}\n") + output_stream.write(f"Size: {safe_get(data, 'Size', 'N/A')} bytes\n") + + # Location + locs = safe_get(data, '!locs', []) + loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A" + output_stream.write(f"Location: {loc}\n") + + # Operands + output_stream.write("\nOperands:\n") + output_stream.write("-" * 20 + "\n") + try: + in_ops = safe_get(data, 'InOperandList', {}).get('printable', 'N/A') + output_stream.write(f"Inputs: {in_ops}\n") + except: + output_stream.write("Inputs: N/A\n") + + try: + out_ops = safe_get(data, 'OutOperandList', {}).get('printable', 'N/A') + output_stream.write(f"Outputs: {out_ops}\n") + except: + output_stream.write("Outputs: N/A\n") + + # Instruction Properties + output_stream.write("\nInstruction Properties:\n") + output_stream.write("-" * 20 + "\n") + output_stream.write(f"Commutable: {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n") + output_stream.write(f"Memory Load: {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n") + output_stream.write(f"Memory Store: {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n") + output_stream.write(f"Side Effects: {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n") + + # Scheduling Info + sched = safe_get(data, 'SchedRW', []) + if sched: + output_stream.write("\nScheduling Information:\n") + output_stream.write("-" * 20 + "\n") + output_stream.write("Operations:\n") + try: + for op in sched: + if isinstance(op, dict): + output_stream.write(f" - {op.get('printable', 'N/A')}\n") + except: + output_stream.write(" - Unable to parse scheduling information\n") + + # Encoding + output_stream.write("\nEncoding Pattern:\n") + output_stream.write("-" * 20 + "\n") + encoding_bits = [] + try: + inst = safe_get(data, 'Inst', []) + for bit in inst: + if isinstance(bit, dict): + encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]") + else: + encoding_bits.append(str(bit)) + # Reverse the bit order before joining + encoding_bits.reverse() + encoding = "".join(encoding_bits) + output_stream.write(f"Binary Format: {encoding}\n") + except: + output_stream.write("Binary Format: Unable to parse encoding\n") + encoding = "" + + # Now compare YAML vs JSON encodings + yaml_match, yaml_vars = load_yaml_encoding(name) + if yaml_match is not None and encoding: + differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding) + if differences: + output_stream.write("\nDifferences in encoding:\n") + for d in differences: + output_stream.write(f" - {d}\n") + print(f"Difference in {name}: {d}", file=sys.stdout) # Print to console + else: + output_stream.write("\nNo encoding differences found.\n") + else: + # If we have no YAML match or no encoding, we note that we can't compare + if yaml_match is None: + output_stream.write("\nNo YAML encoding match found for comparison.\n") + if not encoding: + output_stream.write("\nNo JSON encoding found for comparison.\n") + + output_stream.write("\n") + except Exception as e: + output_stream.write(f"Error processing instruction {name}: {str(e)}\n") + output_stream.write("Continuing with next instruction...\n\n") + +def get_repo_instructions(repo_directory): + """ + Recursively find all YAML files in the repository and extract instruction names along with their category. + """ + repo_instructions = {} + for root, _, files in os.walk(repo_directory): + rel_path = os.path.relpath(root, repo_directory) + if rel_path == '.': + category = "Other" + else: + parts = rel_path.split(os.sep) + category = parts[0] if parts else "Other" + + for file in files: + if file.endswith(".yaml"): + instr_name = os.path.splitext(file)[0] + # Store lowercase key for easy lookup + repo_instructions[instr_name.lower()] = category + return repo_instructions + +def find_json_key(instr_name, json_data): + """ + Attempt to find a matching key in json_data for instr_name, considering different + naming conventions: replacing '.' with '_', and trying various case transformations. + """ + lower_name = instr_name.lower() + lower_name_underscore = lower_name.replace('.', '_') + variants = { + lower_name, + lower_name_underscore, + instr_name.upper(), + instr_name.replace('.', '_').upper(), + instr_name.capitalize(), + instr_name.replace('.', '_').capitalize() + } + + for v in variants: + if v in json_data: + return v + return None + +def main(): + global REPO_INSTRUCTIONS, REPO_DIRECTORY + + if len(sys.argv) != 3: + print("Usage: python riscv_parser.py ") + sys.exit(1) + + json_file = sys.argv[1] + REPO_DIRECTORY = sys.argv[2] + + # Get instructions and categories from the repository structure + REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY) + if not REPO_INSTRUCTIONS: + print("No instructions found in the provided repository directory.") + sys.exit(1) + + try: + # Read and parse JSON + with open(json_file, 'r') as f: + data = json.loads(f.read()) + except Exception as e: + print(f"Error reading file: {str(e)}") + sys.exit(1) + + all_instructions = [] + + # For each YAML instruction, try to find it in the JSON data + for yaml_instr_name, category in REPO_INSTRUCTIONS.items(): + json_key = find_json_key(yaml_instr_name, data) + if json_key is None: + print(f"DEBUG: Instruction '{yaml_instr_name}' (from YAML) not found in JSON, skipping...", file=sys.stderr) + continue + + instr_data = data.get(json_key) + if not isinstance(instr_data, dict): + print(f"DEBUG: Instruction '{yaml_instr_name}' is in JSON but not a valid dict, skipping...", file=sys.stderr) + continue + + # Add this instruction to our list + all_instructions.append((json_key, instr_data)) + + # Sort all instructions by name + all_instructions.sort(key=lambda x: x[0].lower()) + + with open("output.txt", "w") as outfile: + outfile.write("RISC-V Instruction Summary\n") + outfile.write("=" * 50 + "\n") + total = len(all_instructions) + outfile.write(f"\nTotal Instructions Found: {total}\n") + for name, _ in all_instructions: + outfile.write(f" - {name}\n") + + outfile.write("\nDETAILED INSTRUCTION INFORMATION\n") + outfile.write("=" * 80 + "\n") + + # Print details for each instruction directly, no category splitting + for name, instr_data in all_instructions: + safe_print_instruction_details(name, instr_data, outfile) + + print("Output has been written to output.txt") + +if __name__ == '__main__': + main() From 1a14ba8c8535e16a5e628b9f81ae6c0317d54c01 Mon Sep 17 00:00:00 2001 From: Afonso Oliveira Date: Wed, 18 Dec 2024 09:01:28 +0000 Subject: [PATCH 2/7] Fix errors due to incorrect parsing of VM Signed-off-by: Afonso Oliveira --- ext/auto-inst/parsing.py | 86 +++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 40 deletions(-) diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py index be1787874..449a7c583 100644 --- a/ext/auto-inst/parsing.py +++ b/ext/auto-inst/parsing.py @@ -55,6 +55,7 @@ def load_yaml_encoding(instr_name): def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str): """ Compare the YAML encoding (match + vars) with the JSON encoding (binary format). + If the JSON has a variable like vm[?], it should be treated as just vm. Return a list of differences. """ if not yaml_match: @@ -75,8 +76,7 @@ def parse_location(loc_str): high, low = parse_location(var["location"]) yaml_var_positions[var["name"]] = (high, low) - # Tokenize JSON encoding - tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\])?)', json_encoding_str) + tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)', json_encoding_str) json_bits = [] bit_index = 31 for t in tokens: @@ -86,6 +86,13 @@ def parse_location(loc_str): if bit_index != -1: return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."] + normalized_json_bits = [] + for pos, tt in json_bits: + if re.match(r'vm\[[^\]]*\]', tt): + tt = 'vm' + normalized_json_bits.append((pos, tt)) + json_bits = normalized_json_bits + differences = [] # Check fixed bits @@ -103,7 +110,6 @@ def parse_location(loc_str): elif json_bit_str != yaml_bit: differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'") else: - # Variable bit in YAML if json_bit_str in ['0', '1']: differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'") @@ -117,7 +123,8 @@ def parse_location(loc_str): else: json_var_fields.append('?') - field_names = set(re.findall(r'([A-Za-z0-9]+)\[\d+\]', ' '.join(json_var_fields))) + # Extract field names from something like varName[index]. After normalizing, vm and others won't have indices. + field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields))) if len(field_names) == 0: differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}") elif len(field_names) > 1: @@ -128,23 +135,19 @@ def parse_location(loc_str): def safe_print_instruction_details(name: str, data: dict, output_stream): """Print formatted instruction details and compare YAML/JSON encodings.""" try: - # Print the instruction details without separating by category output_stream.write(f"\n{name} Instruction Details\n") output_stream.write("=" * 50 + "\n") - # Basic Information output_stream.write("\nBasic Information:\n") output_stream.write("-" * 20 + "\n") output_stream.write(f"Name: {name}\n") output_stream.write(f"Assembly Format: {safe_get(data, 'AsmString', 'N/A')}\n") output_stream.write(f"Size: {safe_get(data, 'Size', 'N/A')} bytes\n") - # Location locs = safe_get(data, '!locs', []) loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A" output_stream.write(f"Location: {loc}\n") - # Operands output_stream.write("\nOperands:\n") output_stream.write("-" * 20 + "\n") try: @@ -159,26 +162,26 @@ def safe_print_instruction_details(name: str, data: dict, output_stream): except: output_stream.write("Outputs: N/A\n") - # Instruction Properties - output_stream.write("\nInstruction Properties:\n") - output_stream.write("-" * 20 + "\n") - output_stream.write(f"Commutable: {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n") - output_stream.write(f"Memory Load: {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n") - output_stream.write(f"Memory Store: {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n") - output_stream.write(f"Side Effects: {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n") - - # Scheduling Info - sched = safe_get(data, 'SchedRW', []) - if sched: - output_stream.write("\nScheduling Information:\n") - output_stream.write("-" * 20 + "\n") - output_stream.write("Operations:\n") - try: - for op in sched: - if isinstance(op, dict): - output_stream.write(f" - {op.get('printable', 'N/A')}\n") - except: - output_stream.write(" - Unable to parse scheduling information\n") + # # Instruction Properties + # output_stream.write("\nInstruction Properties:\n") + # output_stream.write("-" * 20 + "\n") + # output_stream.write(f"Commutable: {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n") + # output_stream.write(f"Memory Load: {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n") + # output_stream.write(f"Memory Store: {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n") + # output_stream.write(f"Side Effects: {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n") + + # # Scheduling Info + # sched = safe_get(data, 'SchedRW', []) + # if sched: + # output_stream.write("\nScheduling Information:\n") + # output_stream.write("-" * 20 + "\n") + # output_stream.write("Operations:\n") + # try: + # for op in sched: + # if isinstance(op, dict): + # output_stream.write(f" - {op.get('printable', 'N/A')}\n") + # except: + # output_stream.write(" - Unable to parse scheduling information\n") # Encoding output_stream.write("\nEncoding Pattern:\n") @@ -194,28 +197,31 @@ def safe_print_instruction_details(name: str, data: dict, output_stream): # Reverse the bit order before joining encoding_bits.reverse() encoding = "".join(encoding_bits) - output_stream.write(f"Binary Format: {encoding}\n") + output_stream.write(f"JSON Encoding: {encoding}\n") except: - output_stream.write("Binary Format: Unable to parse encoding\n") + output_stream.write("JSON Encoding: Unable to parse encoding\n") encoding = "" - # Now compare YAML vs JSON encodings + # compare YAML vs JSON encodings yaml_match, yaml_vars = load_yaml_encoding(name) - if yaml_match is not None and encoding: + if yaml_match is not None: + output_stream.write(f"YAML Encoding: {yaml_match}\n") + else: + output_stream.write("YAML Encoding: Not found\n") + + if yaml_match and encoding: + # Perform comparison differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding) - if differences: - output_stream.write("\nDifferences in encoding:\n") + if differences and len(differences) > 0: + output_stream.write("\nEncodings do not match. Differences:\n") for d in differences: output_stream.write(f" - {d}\n") print(f"Difference in {name}: {d}", file=sys.stdout) # Print to console else: - output_stream.write("\nNo encoding differences found.\n") + output_stream.write("\nEncodings Match: No differences found.\n") else: - # If we have no YAML match or no encoding, we note that we can't compare - if yaml_match is None: - output_stream.write("\nNo YAML encoding match found for comparison.\n") - if not encoding: - output_stream.write("\nNo JSON encoding found for comparison.\n") + # If we have no YAML match or no JSON encoding, we note that we can't compare + output_stream.write("\nComparison: Cannot compare encodings (missing YAML or JSON encoding).\n") output_stream.write("\n") except Exception as e: From afc753e9a8fc38346d7c87105028db487bc442d3 Mon Sep 17 00:00:00 2001 From: Afonso Oliveira Date: Thu, 19 Dec 2024 08:06:33 +0000 Subject: [PATCH 3/7] First Refactor to pytest Signed-off-by: Afonso Oliveira --- .../conftest.cpython-310-pytest-8.3.4.pyc | Bin 0 -> 549 bytes .../__pycache__/parsing.cpython-310.pyc | Bin 0 -> 9093 bytes .../test.cpython-310-pytest-8.3.4.pyc | Bin 0 -> 1994 bytes ext/auto-inst/parsing.py | 77 +++++++----------- ext/auto-inst/test.py | 56 +++++++++++++ ext/riscv-opcodes | 2 +- 6 files changed, 88 insertions(+), 47 deletions(-) create mode 100644 ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc create mode 100644 ext/auto-inst/__pycache__/parsing.cpython-310.pyc create mode 100644 ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc create mode 100644 ext/auto-inst/test.py diff --git a/ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b8bc1d08667e8e7a0f516d6da2fb738b7cfd03e GIT binary patch literal 549 zcmYjOJx{|h5Vg~!X$zG)z`&LPRmzZNV?tFC8-jw=i7rtZJFQC^N45i`Vq|4w;vevr zydnnv0u$$yRz2xH-Z}5xIp*DN2Z6nR%;w*azYN$n3&03m_8|~NaE7M1&ep17A&ge` z)WL{Yr16TTO@b%Lw}0SUG)}Nd|7L0j{1RNALU=(-P%}JYzcug%JAj)TOb{r!c#3;!LA|TV?btJ?zNMRSq8QJ$y`WjK?gW9L1&;_5>$81| z=}HDtl?P`usjGzNDrPy$=idFr&2=p)eXQFtA-piM0GdTCBo&`XxNwKxVYuKc8pbKl zCFdcbfcF8bCxnn0x}EfvkK!<@qp!0g)k>oRH{Ky>0VKpGTMzWze(BE7}lVFO%(%Y7Hx4jOKH`7F|Twh@?v|yTU Qov%14Gdip{F|Xf8fBQq3DgXcg literal 0 HcmV?d00001 diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..757e1865563e2916a5b76637ec4bd05e7c6b60ae GIT binary patch literal 9093 zcmb7K&2Jn>cJJ!$nd#~I;P6Y|mfEr{YHV_3TSzwM%F@b`Y%8`YuPCpf5ALKn)g)UQ zP7kYlD6vuFIG_OA2P0k}xx`RF0=YOqZaD<|4nO6dWJJY z%Sd*JuCA`C_p0jEt5?7Gib1JlYxte{=f-OGbxr#6_hQ z>RZ2~tG98-z*}#c>zO+lmbS^>$@-d`V1#ej*FEh?dhEiQ}(0 ztsrt%f=-J&fp9z_0#U0PiS37eD~#M$!%IvxU;~^{VRAQe6XSummE?L{zS9IP*xuXc z)`E5KoVyaV!XP-uJ>fk#C;YInIoE0VE566)`0_dLQFPAjM8O>KKi75zI$Eu@w-PJF z4C?5mKtd=Y(RG`ZSr&g56I9S(Ncul&X5mpX`%h#dZCmRyNQ>=iT}TX4({~x9q;fk3 zXB;x55^i)2Jf>t+Im9^Z$HFH z=-kfxY3&DGy);KupP8GMDE#9GLyF(0@x10_wt8??($2aQN7Zz{b{N&25 z>o*n_RbP>CTj5F|zzTX9rky0Lp0L`&UZ%F?t~aat!a~ACV)ciUWK_pVe$y4cyWI4` zYB@1NFG@1a;GriH!{t2539sFBp|j)P>p9VbFfrS1w3e9n1HYA&Rg&DI7 zN!AZZ8i^USy;fqUlk7wYn_@*~;#>c`qMLfrD6$jJC@nHe7q5YC#0VWm0d-;!!#~qC7@#o2*(xlV z)I*!=DyQv0k-536V@oh;Y%`vDV2D#Zn|^((@!S)GTBf$NYx){C7;UtPZWbFV%k87u zHoK>Hb6rd3q_v{U3`z?}wF}zDOr&F@7RlIkq4r&ym%0U+2X(1il=hA(3$nP9ZtDUs z|AE#mY`iNAd<!@@mgQK= zal@0Pt__+>l;xRi8uQXu8a8w3OYMjT!!Z7Zw!)O%w}|UD>rTjt@GoU$$K=yrQoo0u zz(6o<<4@!SKMaaVInIy3wEUBt#G8p<4z)ch$3bySP9i=2B-f(@1?KXPL+wsHzn$Xy z??dfg=yA&QIAwNnyBg-eG54ud-k95Tl$DS>$!S|h?eVYJ7YwsHiPLOoR^KpT$!SPO z%|mhgFZ$-E(tKp8m2&utJ9#<1%XW20;}CS<5HtekAn4Djv7MGvnBPk>ht!bPx7rh! zLrCb?y|(2CCfdF?)b?e02yI^(qOej5S%L986aRxUN>_q)93GysM9P%)_k(jbHBqnO z6cl0AdFV%}y&`isFmGp4a-Rr^%gV3_y6;+1x?%`<*aCo0Qkh^r&?~iPB&N=kUr3&O{ukJ(CyDEd;66Oh=3( z5-iGwXPd+FpZZasJJkbH!~DDoDHboVI{P6wR&L$ni^7f}TY@;YlPoc!79!X=k&p zj@4z`3@P+N9a{Q=m_@6qo@Byy(~lCvZxNu#HG@VSCN(kF{MEI@fE_((^vZ-1ygC}H z1LIxlkw9RXRapFHZZR(_Q|R_7p^&G~@QdFXT@D_2p#oHHyY0Dx zj!P)W`8SK64*tL7{kVEC$zZs z{}JJUG&bL%37npHppT=NLWzT>rx0aLkfgBU?Dd4b5g(gRl|9w95ZL1p6pmWY#FZIm zxf7+@O&f1!kD>4Y!Av~UE7{+bM7>W4T~2w<{GcqwJVrE-m-sOakR|%^keI|zQ22=2 zT}HCUF2ygN9afC$b2MX*IdT4HJ!s;@v(J%Dy1P@T?YGjYV))p96Z6hT@C*Dl?Rl~R zC_B;v<^7gq3smc>E$E=eMgdCEg_qS0l({HxSvd}Oz%O;}OTqGuzhM4YFV6MaBZhi3T-wIv1?LA*iC-lQhkD0twt!X&Sl z(omB;(mDp`JxW-VOtr|;HvQP+_5C*DZDMed7!-!=V`8ZeL7!w4-=x?lQNy&*cZ#*f zR5^X7l1mEettwOWRMo{L>f{Ii5NxI!5oodB3_F$eFHO#j@{k06~gsBQv$E8ujh?-X<@Ws8P5iB`rXu)9}Z1nUCPxiGP*3ot(^Lw8!}IT^+rgP+u?f zzD_IsU1AY{5Fop-gkZ%9v?roV@rTO_;n;BN7^GJEey@`MWW=Vy7p}?D##Q( z7XXI~{B(x_JdG<3FUi7yLo1cu%S0&)Xmd;2lt!SE0MhU$OR|KL1*}{tT{-$voL*5> zCCvC}x2#Ie$nt=9RN^yAi_TK%6EiB`ALG@pbq1~6INvHLidTt8ZxpX7eo9*@2T%C) zB0S-!@(tVif@$3`rp3n;)ojWa@*3M_s>gxm&ZV@sLERnuY>arMxrgkbR;|TXKNdg~ zJv>KHX4os{UIbKy(-*F8Z}+Ri=-kKw9@Y|O$J5UBA%FU6N@?$%fBSd-+3}S9jvM+7 zaNPHI=h4I5bAEO_xd1RS?~MH7$;)BrtuG_A^R7awRG;K8hwFZWhXFATD6PE{t&{Zea=bi6ZVp7~7K0CzeBZ{~v zKrwVe_7IjGzh__BcM;|fLU*qxy%0{3AF7aynijysh~mTjiC?%z!9PO#a8Oj!;SChO zawTX*eyc-a9pzHB5Xl?fW3^h%wyWbw=Ai)SQ1KE$5H>Lhbc*+By(q>hGL(EkH3s2+ z#B4S_;CqEnR8&Hpgh@uJ9NKJ!Nj8jl(199Wd(`mSgg>g4!BLr%PzR+VXyCc)=n#rd zo3O<}vK_N9dpd0j= zM&9Z@8}kJS!w889cwjRdLCgtu41Y!aG;;K~2y>TE!m$PzM3Rko27*k_gukoLPY1f*=wl;*+608j2?J;zcSbfgOC)%{1V5P+_JK)UJPz z;jcg#5->N-DYfPmKtX84=lcJc8Sxb=8swk#f3${qJPHCXBSZeS3Uuo`EDddU4Xlkp z*9E&&+bivT86#!*TFds<+@>{`3{k3@%;IhtKDntW>v6W9TPYQwfEfvqy zW>PydQ%8J)&TRyssRCHv4>}<z*@-Bs*lr9t68;7b#DOCPy;I5T|n08D)XSj^gbA4m+BWN`(2 z%_uJs00p{3_=XT0P!|ySXBp}%%@OJXPR=3>Xsa;e@24R~Ak>^Rw-LUTd9HUfu?Bt_ z70k;%F{0xA63-D<+_)jlowCdm0*=O{9aXmV@U#84)`7O!o$>VkgHRx7v-ja8aQm^w z3tyuIv)6eMvws)i;=;}(FDcAPxiaOVDVarp66hBtW3mWrS?T#eS(v>N#$E!}rP^^> z8q{w5jVhbi=QgQwL_@(Cgo~qTi4e0a(7n#a-+xcm|28aZ%x+4;S4gEQAXiWUkZ}N<)Cd ztWuKm)JF^8FY=s$I;Q(?s5R~l6^u@a;vXaaGgOP{i@Hbb*>(VlO>BHVY~%+cT@d8c z##1Tiz_G3FoM@o)PCRw>+B=`TH$Q^IXX4qJ-n|w_ zM+Yr}b)o65FLU?eNwf@!RZ)vih)xT)LWFk);mLkOaHHcoVGrZuH?J>Vnfuv@2o^i* z>j+uf;*ZcMK5pL%V8w=8gzynyu3$#%Xd&)Xv(xs~Yqu_6|M1#XXXqjh`FC&J{OB@W z%GvQxe`n9;{~)mA<2Mu}b10a%?BSY{P+^2O&XDRML8eiiapDN&HYlMWGVbOO!KFJr zm(!68y%2<@L_o<$l#tOB6rxe65I5E-swAj61TD=l5Rq7?LWMjD7gfi4(J4|h@t9Z? zQf*h4bqQNhkr(k9)l`63hT&b9jH#HEQus9XWc5KvVyW|wF2#HIgLITA&`DiU3=1oj z0#*VL2Q|eWD8+A39o|A>T9!e9CSXEfJ|Mhj=I3P~%Cf?Frda_Zw15wFg(&G=F)Bt8 zm@!3%MTHk_AjdKen-jk&kC{yT6&N@0?Eehp2#;UE6B0U%aKzk)(dP`eP;?Zi10Z6! z`B8x-s^tdIplZiWsw{}JeKiuRUsK{=YMUMoFLNrV@j3$AFasRbFpw2Ok`bx&z)iC-YW{`r<(*>ug7 zl9;Yo-Bb#enBF7ce3C)c5Pv~LN(ZvI6i4{=;G&r8uHSl-w(bO)LhKMMf%h_^dN{|Q ziN8j1V%6)^u3ksqG&j7|NTw7F6@N~>T&F}0rqBqQO+ZY#_7-Z`r>IWt0ktK_&J28@ zRO^+$MEaX<7^VL-iTwf49Y(!AG=72&X{gh>upaOZJnGOE^`E0rOz~sX4gLVHXtRq=B+GnUBvLV%Vk=xe9(-;444RVA)o35hF~z5N zhL>Xp#*TLO-MLpc^5ytK-l`qyo;ovE`xJD3LO(kv7$ca$%B7iKyJxc+*G}zDTzksR zs?+Q3E0K>UJY!YV>q%CKG3By~M5&8X_w|vk-uz1Oe3yLwcx#i~2gM<&t}R5{psu(U z@|fJ^lBIS8t5>!f<3^c0O7z~4bOYn3BA@VG)81uT6q<|NI6@h(00k2yJE|FY`-11D zRY;!eu4e+to|Q~#(>_0^3C~VxI*U^_$NvS%RmkIu;b2cp*&vySTnRCtOtS9=lB@Kf zSLS@e8ST-X0Xx!zq|~CvbEO9wcIX$!rlXiH3&2b7sg_0N3Usj(=PqdK??^qqyE6`$|8((H)oYGa3hDr>-HxdL%jTTegT-h8o$mGF;!GyM(L7~p z_!6faNVg^7V`%C+bi2T+Z#z&P@Eo1mVu4PvbZY#g^UMK^@C)4QI;I7X@WMn&CsM0J zuJ=Ib^luw4<2h^Y$f8>VmnK@zxYDVZ78@Vgee9;!QT4IC5#tLC##4*m81NhKu)XAf z3)x@KM4Du30|sX<&XGC`me7mn76iHR3N9>>`ryp!c)fYgUC5MzH zUE>1`nT9fC3GoCXAVs1SQ)H*qq=^7jEd|RIvJK+1P`Z({flGUhjb9ww=#4i?cqT0Z z(r@JK2;zi-B?^`s4~R!0%SFbuO|0s+o3bD~)*!Z>y)VX>OqH27cwz!5fSTpnv>F2w z+2o9y@xNiY`TG371>V*B&_#IF4V@6`QtU#HV<*Db{tmqmhkghZtC6|#T~O_=m~f}$ znTCsN;ycCh_B*GbZhg?n`zkxn`z)F0$o#A&bDcIb$N_+u*s$lL_pf4mjRq s(*zTKq)W*bd9nLn3e(nV*-$WCpTf1z*oOT-Dx0~RD8w=LAdYzLA9p@FdH?_b literal 0 HcmV?d00001 diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py index 449a7c583..d227061d3 100644 --- a/ext/auto-inst/parsing.py +++ b/ext/auto-inst/parsing.py @@ -3,9 +3,9 @@ import re import sys from collections import defaultdict -import yaml +import yaml -REPO_INSTRUCTIONS = {} +REPO_INSTRUCTIONS = {} REPO_DIRECTORY = None def safe_get(data, key, default=""): @@ -123,7 +123,7 @@ def parse_location(loc_str): else: json_var_fields.append('?') - # Extract field names from something like varName[index]. After normalizing, vm and others won't have indices. + # Extract field names field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields))) if len(field_names) == 0: differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}") @@ -162,27 +162,6 @@ def safe_print_instruction_details(name: str, data: dict, output_stream): except: output_stream.write("Outputs: N/A\n") - # # Instruction Properties - # output_stream.write("\nInstruction Properties:\n") - # output_stream.write("-" * 20 + "\n") - # output_stream.write(f"Commutable: {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n") - # output_stream.write(f"Memory Load: {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n") - # output_stream.write(f"Memory Store: {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n") - # output_stream.write(f"Side Effects: {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n") - - # # Scheduling Info - # sched = safe_get(data, 'SchedRW', []) - # if sched: - # output_stream.write("\nScheduling Information:\n") - # output_stream.write("-" * 20 + "\n") - # output_stream.write("Operations:\n") - # try: - # for op in sched: - # if isinstance(op, dict): - # output_stream.write(f" - {op.get('printable', 'N/A')}\n") - # except: - # output_stream.write(" - Unable to parse scheduling information\n") - # Encoding output_stream.write("\nEncoding Pattern:\n") output_stream.write("-" * 20 + "\n") @@ -234,18 +213,11 @@ def get_repo_instructions(repo_directory): """ repo_instructions = {} for root, _, files in os.walk(repo_directory): - rel_path = os.path.relpath(root, repo_directory) - if rel_path == '.': - category = "Other" - else: - parts = rel_path.split(os.sep) - category = parts[0] if parts else "Other" - for file in files: if file.endswith(".yaml"): instr_name = os.path.splitext(file)[0] - # Store lowercase key for easy lookup - repo_instructions[instr_name.lower()] = category + relative_path = os.path.relpath(root, repo_directory) + repo_instructions[instr_name.lower()] = relative_path return repo_instructions def find_json_key(instr_name, json_data): @@ -269,21 +241,21 @@ def find_json_key(instr_name, json_data): return v return None -def main(): +def run_parser(json_file, repo_directory, output_file="output.txt"): + """ + Run the parser logic: + 1. Get instructions from the repo directory. + 2. Parse the JSON file and match instructions. + 3. Generate output.txt with instruction details. + """ global REPO_INSTRUCTIONS, REPO_DIRECTORY - - if len(sys.argv) != 3: - print("Usage: python riscv_parser.py ") - sys.exit(1) - - json_file = sys.argv[1] - REPO_DIRECTORY = sys.argv[2] + REPO_DIRECTORY = repo_directory # Get instructions and categories from the repository structure REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY) if not REPO_INSTRUCTIONS: print("No instructions found in the provided repository directory.") - sys.exit(1) + return None try: # Read and parse JSON @@ -291,7 +263,7 @@ def main(): data = json.loads(f.read()) except Exception as e: print(f"Error reading file: {str(e)}") - sys.exit(1) + return None all_instructions = [] @@ -313,7 +285,7 @@ def main(): # Sort all instructions by name all_instructions.sort(key=lambda x: x[0].lower()) - with open("output.txt", "w") as outfile: + with open(output_file, "w") as outfile: outfile.write("RISC-V Instruction Summary\n") outfile.write("=" * 50 + "\n") total = len(all_instructions) @@ -324,11 +296,24 @@ def main(): outfile.write("\nDETAILED INSTRUCTION INFORMATION\n") outfile.write("=" * 80 + "\n") - # Print details for each instruction directly, no category splitting + # Print details for each instruction directly for name, instr_data in all_instructions: safe_print_instruction_details(name, instr_data, outfile) - print("Output has been written to output.txt") + print(f"Output has been written to {output_file}") + return output_file + +def main(): + if len(sys.argv) != 3: + print("Usage: python riscv_parser.py ") + sys.exit(1) + + json_file = sys.argv[1] + repo_directory = sys.argv[2] + + result = run_parser(json_file, repo_directory, output_file="output.txt") + if result is None: + sys.exit(1) if __name__ == '__main__': main() diff --git a/ext/auto-inst/test.py b/ext/auto-inst/test.py new file mode 100644 index 000000000..6f0b7b21a --- /dev/null +++ b/ext/auto-inst/test.py @@ -0,0 +1,56 @@ +import pytest +import os +from parsing import run_parser + +@pytest.fixture +def setup_paths(request): + json_file = request.config.getoption("--json_file") + repo_dir = request.config.getoption("--repo_dir") + + # Resolve absolute paths + json_file = os.path.abspath(json_file) + repo_dir = os.path.abspath(repo_dir) + output_file = os.path.join(repo_dir, "output.txt") + + print(f"Using JSON File: {json_file}") + print(f"Using Repository Directory: {repo_dir}") + print(f"Output File Path: {output_file}") + + return json_file, repo_dir, output_file + +def test_run_parser_mimic_old_behavior(setup_paths): + json_file, repo_dir, output_file = setup_paths + + # Run the parser (similar to old behavior) + result = run_parser(json_file, repo_dir, output_file=output_file) + + if result is None: + print("WARNING: No instructions found or an error occurred. (Mimic old script warning)") + # You could fail here if this was previously considered a hard error + pytest.fail("No output produced by run_parser.") + + # Check output file content + if not os.path.exists(output_file): + print("ERROR: output.txt was not created. (Mimic old script error)") + pytest.fail("Output file was not created.") + + with open(output_file, 'r') as f: + content = f.read() + + # Mimic old behavior: print warnings if no instructions found + if "Total Instructions Found: 0" in content: + print("WARNING: No instructions found in output.txt (Mimic old script warning)") + + # Check for encoding differences + # In the original script, encoding mismatches were printed like: + # "Encodings do not match. Differences:" + # If we find that line, we mimic the old error messages + if "Encodings do not match. Differences:" in content: + # Extract differences lines + lines = content.splitlines() + diff_lines = [line for line in lines if line.strip().startswith("-")] + print("ERROR: Encoding differences found! (Mimic old script error)") + pytest.fail("Encodings do not match as per old behavior.") + + # If we reach here, we mimic the old success output + print("No warnings or errors detected. Test passes but mimics old success behavior.") diff --git a/ext/riscv-opcodes b/ext/riscv-opcodes index 5ce8977a5..9226b0d09 160000 --- a/ext/riscv-opcodes +++ b/ext/riscv-opcodes @@ -1 +1 @@ -Subproject commit 5ce8977a5961a6bbfc1638e6676e60489665d882 +Subproject commit 9226b0d091b0d2ea9ccad6f7f8ca1283a3b15e88 From 29ec73767e4ceac81fb58ca9793506d88f237354 Mon Sep 17 00:00:00 2001 From: Afonso Oliveira Date: Thu, 19 Dec 2024 08:19:59 +0000 Subject: [PATCH 4/7] Allow 16 bit instructions for C extension Signed-off-by: Afonso Oliveira --- .../__pycache__/parsing.cpython-310.pyc | Bin 9093 -> 9422 bytes .../test.cpython-310-pytest-8.3.4.pyc | Bin 1994 -> 1944 bytes ext/auto-inst/parsing.py | 30 +++++++++++++----- ext/auto-inst/test.py | 2 +- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc index 757e1865563e2916a5b76637ec4bd05e7c6b60ae..09227159071d37a12bdaf3dbfea20c3722291076 100644 GIT binary patch delta 1997 zcmZuyZ)hAv6yLYEx3{;qdw-M5U22ofB{g1?G-#@gwzaL5)*4ELwu-gwxykNb&m?!r z+$L?$S+4e~Nu@~WCc@a0f{6H~zS(Q_1n1b_zL|ON zH}CyscJ9r?-yR8976CYAT3SRqTh zEVyz}a04|eJ}aD#y9$-5V(Dq24yQyn=&CeG)x1apI2uk1dxV8C$G0w75$eNbLIuIq zsANS`f~(UIGU#rYYU_Tg({NsF00>mKqT@Kba2%}|rbDc_mqdckbP1SF0Mn^qK1dd! z8j>V%>8KmSg=DLP#u_5IWOceR8bRVHjkSmumT1J)FedHOmx z@vlNfrmEFFBdh{RdlY>%iD_hL5E17P`#oU?)`DJQf1oy9s10r*7rM?jc@w#2gk zCblNG61wjjosrP@LYwb)nn_{)Lv0~SD}=_63E5%uAP54>C?D5U1KW|jQ7ly)k}H{W zwo!3R;#7=tMdz$BcrZ6OY&d7_lHm~Bbhz2_U7U7`<&t4mj7Og^rivIaTP_uLczl!S zMa~r~HimjpMTa|%J=X5%d9tq+H_;Ri<~F1FZEhL}f58X|v&mq8I5ce7=jZKd$F_`W zH@g@-38U;z@DLnj8TIuLmLP22!&V(&H`D>Knqaq81|?Pq{aC98Uo=V4oSLmP^a}^u2PL^q~vO={*CH2c_m_?NY%x z+l*c?=Vo*BrsLS8l*8R2kF!;Mt31X@cd`wAxBN6W_p#sgoV=f#huF*E?7k|Rk}87) zGQ@qFs6h%8QIZshLL4-o1PcDbpyFr>B%cNv`bP1QM9mjvx5B-H9pV6p*pFibvOri& z`ohqIQ5g^~N+L1&db`-Ya7KxvIIB>7bt4-Y5wj<#iih!pqfvo|s7BSggjbo4W0Q_V zbv)W(@3>AfH+BGavRkoHSlReBHZID?Ip^Vxk&aJb8~eIu0Ltvs)LyvDHd9;S>P8^_ z8Ne^>tL|;UHh%7Y#fQh{xt_OSnBDGq0jAkNZ=XEHx#!qK??<((+)QzBIP@SirPnIu zQqw4XEFv`JieuS?vann6yV z%NJ*DvfRx%LU?HB7aeb?33(FW5pS@%@e-DP$LJSDxYl^L(dqhD delta 1616 zcmZuxO>7fK6rMM`Ua!~Q^~R24n}o!63@8#Pln6D2CWWdEr4fh zD5bMC$f-gqRSF%Ds-m=Ug*b4lTvcihJ)$03`ge;|IP}C7ajGiOH|rcysVnW9Z{B|2 zd-LAR?8D<99dqVw+ambv{^R15vHxqQ6sl`#zV)PWjN+?&wOf=BL24leQ*fFHTC*4p z5ZNG1V|tVNng>E_01M4%ypI{}*&V{d>zZORlSP(^uTK*{B;Yl2KH(ce7Y4Jkqygus zANI|<2n%zPiV(V^S@JTe?(yh`#$tkEg2)u{EoS>s5ka}_$Aq<^i>QcA(k1{AQRWO2 zKUy6_6vsuBnX7zhf{W1Gi@*rBuuz@EgpMD_=poi2;!Vn{EaBJV!p4w}h_^#lE5i0I z6uF+kLJhU*^Q%N?%)UV~Buo5`6*37d$vT%!6}AC>LL_Qm2zNtgsT(SB*Se+_SG_J0 zERBk!=wO*e!XJtxj=;a)#?9762TFE`B--xv@RkrISk2Gdh&}($3j#md#@o3SDYO+S zv=MF+EP`S6o$Z!maVgIbrU+xF>ZC#ObN4E&0M^y}-@4X-n<9n%2`ULfK2>wp*T$Y1 zY*votHzFd{geJv)bm2MZ!c&o+p{O4W7FQ6RSYW>hqt%3YhpZbxguf1Lk)0VG$;Y;l zi=qq3yS7QpwnVt6$-wTbKgw0}D|s(+-TE8$aR-m=lbk$c^*ej^<|<1Im5W}5<%SN( z8S5R`D<4>6FedHj2X3EYf-=wXx%!fn(IQ3OcTouyc_Q|wd=$+N@aw)WVMGJ4 zNiSSz&($^IN6;)Oa8!O{?~6p0NM8*3#$Uto$uK3@g&gOh8OdXL_ODx{;Xyn3d16 zBYnT4s@91{*E_LD3oCH2JbI1(qlfrv>(q`P03OSD&n{SN9qf5mgBf|V?_=ndJ-Jix zo}A0&jgrcIN#4qR8C_K{qfAwf=6h0}g7eCpP-aaTF6Z;zuZ|Tp36ngm1Ru(u@@KKgVxgZtBl39R9Nd-n3cntC z5x+jczbfH3(F_2;qXwg$LGARWUglGayc)pD(zM6#Du%{C$AK@)Z~JrO2k_|D5;%8) zu2|FphpI!MKj-(A&@7ea%F~xiB}|}xXk=K}2*{meBxFo))G%vm&v8Xt;C_XncH#0dtKPM$V aDK#UpEHl4ovlnYDqp}iVg_Ae3JqG}2swVmX diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py index d227061d3..bd5fac67c 100644 --- a/ext/auto-inst/parsing.py +++ b/ext/auto-inst/parsing.py @@ -52,10 +52,14 @@ def load_yaml_encoding(instr_name): return yaml_match, yaml_vars -def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str): +def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_str): """ Compare the YAML encoding (match + vars) with the JSON encoding (binary format). If the JSON has a variable like vm[?], it should be treated as just vm. + + If instr_name starts with 'C_', then treat the instruction as 16 bits long. + Otherwise, treat it as 32 bits long. + Return a list of differences. """ if not yaml_match: @@ -63,9 +67,12 @@ def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str): if not json_encoding_str: return ["No JSON encoding available for comparison."] + # Determine expected length based on whether it's a compressed instruction (C_) + expected_length = 16 if instr_name.startswith('C_') else 32 + yaml_pattern_str = yaml_match.replace('-', '.') - if len(yaml_pattern_str) != 32: - return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected 32. Cannot compare properly."] + if len(yaml_pattern_str) != expected_length: + return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected {expected_length}. Cannot compare properly."] def parse_location(loc_str): high, low = loc_str.split('-') @@ -76,16 +83,18 @@ def parse_location(loc_str): high, low = parse_location(var["location"]) yaml_var_positions[var["name"]] = (high, low) + # Tokenize the JSON encoding string. We assume it should match the expected_length in bits. tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)', json_encoding_str) json_bits = [] - bit_index = 31 + bit_index = expected_length - 1 for t in tokens: json_bits.append((bit_index, t)) bit_index -= 1 if bit_index != -1: - return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."] + return [f"JSON encoding does not appear to be {expected_length} bits. Ends at bit {bit_index+1}."] + # Normalize JSON bits (handle vm[?] etc.) normalized_json_bits = [] for pos, tt in json_bits: if re.match(r'vm\[[^\]]*\]', tt): @@ -96,8 +105,8 @@ def parse_location(loc_str): differences = [] # Check fixed bits - for b in range(32): - yaml_bit = yaml_pattern_str[31 - b] + for b in range(expected_length): + yaml_bit = yaml_pattern_str[expected_length - 1 - b] token = [tt for (pos, tt) in json_bits if pos == b] if not token: differences.append(f"Bit {b}: No corresponding JSON bit found.") @@ -115,6 +124,11 @@ def parse_location(loc_str): # Check variable fields for var_name, (high, low) in yaml_var_positions.items(): + # Ensure the variable range fits within the expected_length + if high >= expected_length or low < 0: + differences.append(f"Variable {var_name}: location {high}-{low} is out of range for {expected_length}-bit instruction.") + continue + json_var_fields = [] for bb in range(low, high+1): token = [tt for (pos, tt) in json_bits if pos == bb] @@ -190,7 +204,7 @@ def safe_print_instruction_details(name: str, data: dict, output_stream): if yaml_match and encoding: # Perform comparison - differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding) + differences = compare_yaml_json_encoding(name, yaml_match, yaml_vars, encoding) if differences and len(differences) > 0: output_stream.write("\nEncodings do not match. Differences:\n") for d in differences: diff --git a/ext/auto-inst/test.py b/ext/auto-inst/test.py index 6f0b7b21a..eb0c69191 100644 --- a/ext/auto-inst/test.py +++ b/ext/auto-inst/test.py @@ -18,7 +18,7 @@ def setup_paths(request): return json_file, repo_dir, output_file -def test_run_parser_mimic_old_behavior(setup_paths): +def test_llvm(setup_paths): json_file, repo_dir, output_file = setup_paths # Run the parser (similar to old behavior) From 6a81b5dd90f6b6de33b98812a6a1ef1312d3b8ba Mon Sep 17 00:00:00 2001 From: Afonso Oliveira Date: Thu, 19 Dec 2024 09:16:27 +0000 Subject: [PATCH 5/7] Revert bad parsing Signed-off-by: Afonso Oliveira --- .../__pycache__/parsing.cpython-310.pyc | Bin 9422 -> 9422 bytes .../test.cpython-310-pytest-8.3.4.pyc | Bin 1944 -> 1777 bytes ext/auto-inst/test.py | 16 ++++++---------- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc index 09227159071d37a12bdaf3dbfea20c3722291076..6e1d3e4c34d9c9a2038f2ebd613ceb4a4676afeb 100644 GIT binary patch delta 19 ZcmX@-dCrq7pO=@50SGQW-pF-C1pqwz1|t9f delta 19 ZcmX@-dCrq7pO=@50SFFV+sJi91pqu?1_uBD diff --git a/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc index bf345f660203a51a0b63b051ab24a0a32a775131..82b12de2706eb3018c923f41d81b8a5588cb79b4 100644 GIT binary patch delta 145 zcmbQi|B;t3pO=@50SGvsB&R#GZ{+h~W;C4~&1^b(6|)MX(&Y2ZN|U)*>=|`|V)BzS zS@ Date: Thu, 19 Dec 2024 09:22:35 +0000 Subject: [PATCH 6/7] Allow only one value Signed-off-by: Afonso Oliveira --- .../__pycache__/parsing.cpython-310.pyc | Bin 9422 -> 9483 bytes ext/auto-inst/parsing.py | 12 ++++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc index 6e1d3e4c34d9c9a2038f2ebd613ceb4a4676afeb..e9334dd9950c43dcaadf78534695e91af9c81202 100644 GIT binary patch delta 819 zcmX9+-Afcv6rVFQJ5IB^GZu=gZn&#!utgc9VM=E9fgdQ64?dI(>V7!2I@_IbwqTcn z70Lo3Zbk&5z6ACnGSOpyLGcc7&RcgR0;OthFj^i#gbVTVf4rqAOLQ4v1nM z3gV1_3^Qs{6@SvR;uPz_ow9NuIW7w#2v!+Ixn#M4WUaX9X(-u?rsE!EfxKiDmjbC| zR|3hKFY#Ym#)x5hOjJRKMo=ZyPegkGJbyBF0nQAek5LQVAsMc5#6 z&3_kf1L)_U_2Wt(SQ0;|E4MO$f5w1S)9QtPU{{ABz9v2;1LqV}p=UUfrD00q`S33dK4J&WySI=evQqn&fG%inZ<86HIEPmv;C zAY_4UlTZ!|RFDVqJS%$43u*qI<+5Fhkk}Rqw$9&or6xuJ$8rpHJsgTAYM_U2K|Wx6 zl&G5K^1QWTnpl9|E{d>R6KkbiUJ*a+of#V-Nsm3N>N%%Vij!!4Hy)w2Z_{Q2_{AnnzoiwE#2nr7L2wr zQkA6QX*BWT4^AXR63E`Yc_ni2vKLJ_c`(MmK;LXHZt_0wJM-qv&U^D-PVQV*b-!P6 z@o#spP#PTGR-Y-;A~$JF@nIa~Xt+MQ?!g+|6`If#OlUw0IE5C>2~)15aE+ySR8p|r zMy*mdZLwT6lcL>K(F<9_WVSS4F3sCwMPI_WH=#&8ScL#Q8p7{d1h;&O)$sZOs#e=~ z22PCeWkxl1aF$YoE#&hZ+e<4KI9`yrX#Md=08XP%>ywAca|%=1m^@DMJl3?VoF;h* zziDZZ@Ias>=g2W)wE{=o@=ekcmKEH9>JDGNZ_P$$yNH!**x@ zMsO=M19wmjkHQs2hF;Cil`BQIg1=)u@D#h_w|L39_+b$6d3*-eaWDS$$taKiDbV)Uv=i8Sn&eKh zI@h+8W2HrAIEtf7zzXg+qy8$ocbr) Date: Thu, 19 Dec 2024 17:32:17 +0000 Subject: [PATCH 7/7] Use AsmString instead of name Signed-off-by: Afonso Oliveira --- .../__pycache__/parsing.cpython-310.pyc | Bin 9483 -> 9704 bytes ext/auto-inst/parsing.py | 46 ++++++++++++------ 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc index e9334dd9950c43dcaadf78534695e91af9c81202..75bd77c41bc885711b1bbf6a2aef74f4c400a15d 100644 GIT binary patch delta 832 zcmZuv&ubGw6rP#gt=nl5L{e;}%A{$b0rA(tLj@r!9;%I)6s6D-rrAxpw!3R)HcDYb z4n;h891mWUqE4(zw@y?yWP_q{hCHhw+H#n~)Ha1LL3 zt)06kId!Gyfcp?0=>epuj|QlZ2dIlu+$RI{2<@)!kS^(3iIrN3eN1G&O%o@zJnWHU zYm#rLwk))1YWHUb$g>tv*GZie9T0IdB?MtqLv8BWHB@r=dyEiea2^M}$YY#^JZbC) zQHuqUOhl)V1aSm5dnMzj?(u}Py(89$!`EEsEZ*Wek(FgwOHAYqKkWGOhG|;$la7ca zOZNR~;{*SQ1x?nBJCS#tRpQ8JF(ioI+d%r`I#XO0ExB$i&Ck%OM?Cb|8W1b3vn^=) zH%arYDdLcAmp7kI3a7-^yr7Ys7jD#^RhW^}h@%8F1X1r_&COWAG(i8Pmod&oO{oDL z77L?f%xW@2E!8lZlK7u;Eu69*s1z+&y|6UEVo;@YLCkCCwuY((TZ6tSBBS9BKxsx4 z;5C|^{6)Chcg!kAl*_OV9ARCf>>ihXT?h2Egua1em_tb2TbjSN0Fg}gqD3*mdCLaO zI4*H9IkPRHPQ}ts3qLsBW}6r*H7tC`3+mnSE*`2M%Om$A6qm60qP`Ui?od0H_^f^x zSFgSRHS!Wfa@fTbQ$leLy5_KbCcdg$E0?pTHZK+-M|@ISE7durP2B}DPF9}J7Tx=9 HhC;<3kxt@C delta 584 zcmY*V&ubGw6yBNL4V$&ui^k%Q!n8>Yfl??G4?PH?2T`hM&m~NEC&_5C8{bYgZQ0O^ z;6d;*#Df=;V^2luAK=~pV8Me2#j95@^-U~-58jXOd*8hGn4gc{JPeXb#kbI2e-FF2 zwmt^>`f(ZIJ@j-RLvGEixiz6Vosoz{bWSIBPIDW1Iyc=5_5#`eafo`QiIY3`tv!F@ z=59_h3pR3>TEyD5Ue+Uf*J?ROpL~XPS1Bw=WAg8+0SgtH4T!Cxm>3hA|cy?)ON~4EU{kHM6T=njF JOFnVV{{T2GrN00G diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py index a29e76cce..b272e3580 100644 --- a/ext/auto-inst/parsing.py +++ b/ext/auto-inst/parsing.py @@ -244,23 +244,37 @@ def get_repo_instructions(repo_directory): def find_json_key(instr_name, json_data): """ - Attempt to find a matching key in json_data for instr_name, considering different - naming conventions: replacing '.' with '_', and trying various case transformations. + Find a matching instruction in json_data by comparing against AsmString values. + Returns the matching key if found, None otherwise. + + Args: + instr_name (str): The instruction name from YAML + json_data (dict): The JSON data containing instruction information + + Returns: + str or None: The matching key from json_data if found, None otherwise """ - lower_name = instr_name.lower() - lower_name_underscore = lower_name.replace('.', '_') - variants = { - lower_name, - lower_name_underscore, - instr_name.upper(), - instr_name.replace('.', '_').upper(), - instr_name.capitalize(), - instr_name.replace('.', '_').capitalize() - } - - for v in variants: - if v in json_data: - return v + # First, normalize the instruction name for comparison + instr_name = instr_name.lower().strip() + + # Search through all entries in json_data + for key, value in json_data.items(): + if not isinstance(value, dict): + continue + + # Get the AsmString value and normalize it + asm_string = safe_get(value, 'AsmString', '').lower().strip() + if not asm_string: + continue + + # Extract the base instruction name from AsmString + # AsmString might be in format like "add $rd, $rs1, $rs2" + # We want just "add" + base_asm_name = asm_string.split()[0] + + if base_asm_name == instr_name: + return key + return None def run_parser(json_file, repo_directory, output_file="output.txt"):