diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py new file mode 100644 index 000000000..be1787874 --- /dev/null +++ b/ext/auto-inst/parsing.py @@ -0,0 +1,328 @@ +import os +import json +import re +import sys +from collections import defaultdict +import yaml + +REPO_INSTRUCTIONS = {} +REPO_DIRECTORY = None + +def safe_get(data, key, default=""): + """Safely get a value from a dictionary, return default if not found or error.""" + try: + if isinstance(data, dict): + return data.get(key, default) + return default + except: + return default + +def load_yaml_encoding(instr_name): + """ + Given an instruction name (from JSON), find the corresponding YAML file and load its encoding data. + We'll try to match the instr_name to a YAML file by using REPO_INSTRUCTIONS and transformations. + """ + candidates = set() + lower_name = instr_name.lower() + candidates.add(lower_name) + candidates.add(lower_name.replace('_', '.')) + + yaml_file_path = None + yaml_category = None + for cand in candidates: + if cand in REPO_INSTRUCTIONS: + yaml_category = REPO_INSTRUCTIONS[cand] + yaml_file_path = os.path.join(REPO_DIRECTORY, yaml_category, cand + ".yaml") + if os.path.isfile(yaml_file_path): + break + else: + yaml_file_path = None + + if not yaml_file_path or not os.path.isfile(yaml_file_path): + # YAML not found + return None, None + + # Load the YAML file + with open(yaml_file_path, 'r') as yf: + ydata = yaml.safe_load(yf) + + encoding = safe_get(ydata, 'encoding', {}) + yaml_match = safe_get(encoding, 'match', None) + yaml_vars = safe_get(encoding, 'variables', []) + + return yaml_match, yaml_vars + +def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str): + """ + Compare the YAML encoding (match + vars) with the JSON encoding (binary format). + Return a list of differences. + """ + if not yaml_match: + return ["No YAML match field available for comparison."] + if not json_encoding_str: + return ["No JSON encoding available for comparison."] + + yaml_pattern_str = yaml_match.replace('-', '.') + if len(yaml_pattern_str) != 32: + return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected 32. Cannot compare properly."] + + def parse_location(loc_str): + high, low = loc_str.split('-') + return int(high), int(low) + + yaml_var_positions = {} + for var in yaml_vars: + high, low = parse_location(var["location"]) + yaml_var_positions[var["name"]] = (high, low) + + # Tokenize JSON encoding + tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\])?)', json_encoding_str) + json_bits = [] + bit_index = 31 + for t in tokens: + json_bits.append((bit_index, t)) + bit_index -= 1 + + if bit_index != -1: + return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."] + + differences = [] + + # Check fixed bits + for b in range(32): + yaml_bit = yaml_pattern_str[31 - b] + token = [tt for (pos, tt) in json_bits if pos == b] + if not token: + differences.append(f"Bit {b}: No corresponding JSON bit found.") + continue + json_bit_str = token[0] + + if yaml_bit in ['0', '1']: + if json_bit_str not in ['0', '1']: + differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'") + elif json_bit_str != yaml_bit: + differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'") + else: + # Variable bit in YAML + if json_bit_str in ['0', '1']: + differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'") + + # Check variable fields + for var_name, (high, low) in yaml_var_positions.items(): + json_var_fields = [] + for bb in range(low, high+1): + token = [tt for (pos, tt) in json_bits if pos == bb] + if token: + json_var_fields.append(token[0]) + else: + json_var_fields.append('?') + + field_names = set(re.findall(r'([A-Za-z0-9]+)\[\d+\]', ' '.join(json_var_fields))) + if len(field_names) == 0: + differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}") + elif len(field_names) > 1: + differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}") + + return differences + +def safe_print_instruction_details(name: str, data: dict, output_stream): + """Print formatted instruction details and compare YAML/JSON encodings.""" + try: + # Print the instruction details without separating by category + output_stream.write(f"\n{name} Instruction Details\n") + output_stream.write("=" * 50 + "\n") + + # Basic Information + output_stream.write("\nBasic Information:\n") + output_stream.write("-" * 20 + "\n") + output_stream.write(f"Name: {name}\n") + output_stream.write(f"Assembly Format: {safe_get(data, 'AsmString', 'N/A')}\n") + output_stream.write(f"Size: {safe_get(data, 'Size', 'N/A')} bytes\n") + + # Location + locs = safe_get(data, '!locs', []) + loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A" + output_stream.write(f"Location: {loc}\n") + + # Operands + output_stream.write("\nOperands:\n") + output_stream.write("-" * 20 + "\n") + try: + in_ops = safe_get(data, 'InOperandList', {}).get('printable', 'N/A') + output_stream.write(f"Inputs: {in_ops}\n") + except: + output_stream.write("Inputs: N/A\n") + + try: + out_ops = safe_get(data, 'OutOperandList', {}).get('printable', 'N/A') + output_stream.write(f"Outputs: {out_ops}\n") + except: + output_stream.write("Outputs: N/A\n") + + # Instruction Properties + output_stream.write("\nInstruction Properties:\n") + output_stream.write("-" * 20 + "\n") + output_stream.write(f"Commutable: {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n") + output_stream.write(f"Memory Load: {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n") + output_stream.write(f"Memory Store: {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n") + output_stream.write(f"Side Effects: {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n") + + # Scheduling Info + sched = safe_get(data, 'SchedRW', []) + if sched: + output_stream.write("\nScheduling Information:\n") + output_stream.write("-" * 20 + "\n") + output_stream.write("Operations:\n") + try: + for op in sched: + if isinstance(op, dict): + output_stream.write(f" - {op.get('printable', 'N/A')}\n") + except: + output_stream.write(" - Unable to parse scheduling information\n") + + # Encoding + output_stream.write("\nEncoding Pattern:\n") + output_stream.write("-" * 20 + "\n") + encoding_bits = [] + try: + inst = safe_get(data, 'Inst', []) + for bit in inst: + if isinstance(bit, dict): + encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]") + else: + encoding_bits.append(str(bit)) + # Reverse the bit order before joining + encoding_bits.reverse() + encoding = "".join(encoding_bits) + output_stream.write(f"Binary Format: {encoding}\n") + except: + output_stream.write("Binary Format: Unable to parse encoding\n") + encoding = "" + + # Now compare YAML vs JSON encodings + yaml_match, yaml_vars = load_yaml_encoding(name) + if yaml_match is not None and encoding: + differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding) + if differences: + output_stream.write("\nDifferences in encoding:\n") + for d in differences: + output_stream.write(f" - {d}\n") + print(f"Difference in {name}: {d}", file=sys.stdout) # Print to console + else: + output_stream.write("\nNo encoding differences found.\n") + else: + # If we have no YAML match or no encoding, we note that we can't compare + if yaml_match is None: + output_stream.write("\nNo YAML encoding match found for comparison.\n") + if not encoding: + output_stream.write("\nNo JSON encoding found for comparison.\n") + + output_stream.write("\n") + except Exception as e: + output_stream.write(f"Error processing instruction {name}: {str(e)}\n") + output_stream.write("Continuing with next instruction...\n\n") + +def get_repo_instructions(repo_directory): + """ + Recursively find all YAML files in the repository and extract instruction names along with their category. + """ + repo_instructions = {} + for root, _, files in os.walk(repo_directory): + rel_path = os.path.relpath(root, repo_directory) + if rel_path == '.': + category = "Other" + else: + parts = rel_path.split(os.sep) + category = parts[0] if parts else "Other" + + for file in files: + if file.endswith(".yaml"): + instr_name = os.path.splitext(file)[0] + # Store lowercase key for easy lookup + repo_instructions[instr_name.lower()] = category + return repo_instructions + +def find_json_key(instr_name, json_data): + """ + Attempt to find a matching key in json_data for instr_name, considering different + naming conventions: replacing '.' with '_', and trying various case transformations. + """ + lower_name = instr_name.lower() + lower_name_underscore = lower_name.replace('.', '_') + variants = { + lower_name, + lower_name_underscore, + instr_name.upper(), + instr_name.replace('.', '_').upper(), + instr_name.capitalize(), + instr_name.replace('.', '_').capitalize() + } + + for v in variants: + if v in json_data: + return v + return None + +def main(): + global REPO_INSTRUCTIONS, REPO_DIRECTORY + + if len(sys.argv) != 3: + print("Usage: python riscv_parser.py ") + sys.exit(1) + + json_file = sys.argv[1] + REPO_DIRECTORY = sys.argv[2] + + # Get instructions and categories from the repository structure + REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY) + if not REPO_INSTRUCTIONS: + print("No instructions found in the provided repository directory.") + sys.exit(1) + + try: + # Read and parse JSON + with open(json_file, 'r') as f: + data = json.loads(f.read()) + except Exception as e: + print(f"Error reading file: {str(e)}") + sys.exit(1) + + all_instructions = [] + + # For each YAML instruction, try to find it in the JSON data + for yaml_instr_name, category in REPO_INSTRUCTIONS.items(): + json_key = find_json_key(yaml_instr_name, data) + if json_key is None: + print(f"DEBUG: Instruction '{yaml_instr_name}' (from YAML) not found in JSON, skipping...", file=sys.stderr) + continue + + instr_data = data.get(json_key) + if not isinstance(instr_data, dict): + print(f"DEBUG: Instruction '{yaml_instr_name}' is in JSON but not a valid dict, skipping...", file=sys.stderr) + continue + + # Add this instruction to our list + all_instructions.append((json_key, instr_data)) + + # Sort all instructions by name + all_instructions.sort(key=lambda x: x[0].lower()) + + with open("output.txt", "w") as outfile: + outfile.write("RISC-V Instruction Summary\n") + outfile.write("=" * 50 + "\n") + total = len(all_instructions) + outfile.write(f"\nTotal Instructions Found: {total}\n") + for name, _ in all_instructions: + outfile.write(f" - {name}\n") + + outfile.write("\nDETAILED INSTRUCTION INFORMATION\n") + outfile.write("=" * 80 + "\n") + + # Print details for each instruction directly, no category splitting + for name, instr_data in all_instructions: + safe_print_instruction_details(name, instr_data, outfile) + + print("Output has been written to output.txt") + +if __name__ == '__main__': + main()