From d9b50b269f0e26fc3f13466edb1c4e7ad8e3eb02 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Fri, 13 Dec 2024 14:12:57 +0000
Subject: [PATCH 1/7] Add simple Docker environment variable

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 ext/auto-inst/parsing.py | 328 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 328 insertions(+)
 create mode 100644 ext/auto-inst/parsing.py

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
new file mode 100644
index 000000000..be1787874
--- /dev/null
+++ b/ext/auto-inst/parsing.py
@@ -0,0 +1,328 @@
+import os
+import json
+import re
+import sys
+from collections import defaultdict
+import yaml  
+
+REPO_INSTRUCTIONS = {} 
+REPO_DIRECTORY = None
+
+def safe_get(data, key, default=""):
+    """Safely get a value from a dictionary, return default if not found or error."""
+    try:
+        if isinstance(data, dict):
+            return data.get(key, default)
+        return default
+    except:
+        return default
+
+def load_yaml_encoding(instr_name):
+    """
+    Given an instruction name (from JSON), find the corresponding YAML file and load its encoding data.
+    We'll try to match the instr_name to a YAML file by using REPO_INSTRUCTIONS and transformations.
+    """
+    candidates = set()
+    lower_name = instr_name.lower()
+    candidates.add(lower_name)
+    candidates.add(lower_name.replace('_', '.'))
+
+    yaml_file_path = None
+    yaml_category = None
+    for cand in candidates:
+        if cand in REPO_INSTRUCTIONS:
+            yaml_category = REPO_INSTRUCTIONS[cand]
+            yaml_file_path = os.path.join(REPO_DIRECTORY, yaml_category, cand + ".yaml")
+            if os.path.isfile(yaml_file_path):
+                break
+            else:
+                yaml_file_path = None
+
+    if not yaml_file_path or not os.path.isfile(yaml_file_path):
+        # YAML not found
+        return None, None
+
+    # Load the YAML file
+    with open(yaml_file_path, 'r') as yf:
+        ydata = yaml.safe_load(yf)
+
+    encoding = safe_get(ydata, 'encoding', {})
+    yaml_match = safe_get(encoding, 'match', None)
+    yaml_vars = safe_get(encoding, 'variables', [])
+
+    return yaml_match, yaml_vars
+
+def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str):
+    """
+    Compare the YAML encoding (match + vars) with the JSON encoding (binary format).
+    Return a list of differences.
+    """
+    if not yaml_match:
+        return ["No YAML match field available for comparison."]
+    if not json_encoding_str:
+        return ["No JSON encoding available for comparison."]
+
+    yaml_pattern_str = yaml_match.replace('-', '.')
+    if len(yaml_pattern_str) != 32:
+        return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected 32. Cannot compare properly."]
+
+    def parse_location(loc_str):
+        high, low = loc_str.split('-')
+        return int(high), int(low)
+
+    yaml_var_positions = {}
+    for var in yaml_vars:
+        high, low = parse_location(var["location"])
+        yaml_var_positions[var["name"]] = (high, low)
+
+    # Tokenize JSON encoding
+    tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\])?)', json_encoding_str)
+    json_bits = []
+    bit_index = 31
+    for t in tokens:
+        json_bits.append((bit_index, t))
+        bit_index -= 1
+
+    if bit_index != -1:
+        return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."]
+
+    differences = []
+
+    # Check fixed bits
+    for b in range(32):
+        yaml_bit = yaml_pattern_str[31 - b]
+        token = [tt for (pos, tt) in json_bits if pos == b]
+        if not token:
+            differences.append(f"Bit {b}: No corresponding JSON bit found.")
+            continue
+        json_bit_str = token[0]
+
+        if yaml_bit in ['0', '1']:
+            if json_bit_str not in ['0', '1']:
+                differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'")
+            elif json_bit_str != yaml_bit:
+                differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'")
+        else:
+            # Variable bit in YAML
+            if json_bit_str in ['0', '1']:
+                differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'")
+
+    # Check variable fields
+    for var_name, (high, low) in yaml_var_positions.items():
+        json_var_fields = []
+        for bb in range(low, high+1):
+            token = [tt for (pos, tt) in json_bits if pos == bb]
+            if token:
+                json_var_fields.append(token[0])
+            else:
+                json_var_fields.append('?')
+
+        field_names = set(re.findall(r'([A-Za-z0-9]+)\[\d+\]', ' '.join(json_var_fields)))
+        if len(field_names) == 0:
+            differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
+        elif len(field_names) > 1:
+            differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}")
+
+    return differences
+
+def safe_print_instruction_details(name: str, data: dict, output_stream):
+    """Print formatted instruction details and compare YAML/JSON encodings."""
+    try:
+        # Print the instruction details without separating by category
+        output_stream.write(f"\n{name} Instruction Details\n")
+        output_stream.write("=" * 50 + "\n")
+
+        # Basic Information
+        output_stream.write("\nBasic Information:\n")
+        output_stream.write("-" * 20 + "\n")
+        output_stream.write(f"Name:              {name}\n")
+        output_stream.write(f"Assembly Format:   {safe_get(data, 'AsmString', 'N/A')}\n")
+        output_stream.write(f"Size:              {safe_get(data, 'Size', 'N/A')} bytes\n")
+
+        # Location
+        locs = safe_get(data, '!locs', [])
+        loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A"
+        output_stream.write(f"Location:          {loc}\n")
+
+        # Operands
+        output_stream.write("\nOperands:\n")
+        output_stream.write("-" * 20 + "\n")
+        try:
+            in_ops = safe_get(data, 'InOperandList', {}).get('printable', 'N/A')
+            output_stream.write(f"Inputs:            {in_ops}\n")
+        except:
+            output_stream.write("Inputs:            N/A\n")
+
+        try:
+            out_ops = safe_get(data, 'OutOperandList', {}).get('printable', 'N/A')
+            output_stream.write(f"Outputs:           {out_ops}\n")
+        except:
+            output_stream.write("Outputs:           N/A\n")
+
+        # Instruction Properties
+        output_stream.write("\nInstruction Properties:\n")
+        output_stream.write("-" * 20 + "\n")
+        output_stream.write(f"Commutable:        {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n")
+        output_stream.write(f"Memory Load:       {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n")
+        output_stream.write(f"Memory Store:      {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n")
+        output_stream.write(f"Side Effects:      {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n")
+
+        # Scheduling Info
+        sched = safe_get(data, 'SchedRW', [])
+        if sched:
+            output_stream.write("\nScheduling Information:\n")
+            output_stream.write("-" * 20 + "\n")
+            output_stream.write("Operations:\n")
+            try:
+                for op in sched:
+                    if isinstance(op, dict):
+                        output_stream.write(f"  - {op.get('printable', 'N/A')}\n")
+            except:
+                output_stream.write("  - Unable to parse scheduling information\n")
+
+        # Encoding
+        output_stream.write("\nEncoding Pattern:\n")
+        output_stream.write("-" * 20 + "\n")
+        encoding_bits = []
+        try:
+            inst = safe_get(data, 'Inst', [])
+            for bit in inst:
+                if isinstance(bit, dict):
+                    encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]")
+                else:
+                    encoding_bits.append(str(bit))
+            # Reverse the bit order before joining
+            encoding_bits.reverse()
+            encoding = "".join(encoding_bits)
+            output_stream.write(f"Binary Format:     {encoding}\n")
+        except:
+            output_stream.write("Binary Format:     Unable to parse encoding\n")
+            encoding = ""
+
+        # Now compare YAML vs JSON encodings
+        yaml_match, yaml_vars = load_yaml_encoding(name)
+        if yaml_match is not None and encoding:
+            differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding)
+            if differences:
+                output_stream.write("\nDifferences in encoding:\n")
+                for d in differences:
+                    output_stream.write(f"  - {d}\n")
+                    print(f"Difference in {name}: {d}", file=sys.stdout)  # Print to console
+            else:
+                output_stream.write("\nNo encoding differences found.\n")
+        else:
+            # If we have no YAML match or no encoding, we note that we can't compare
+            if yaml_match is None:
+                output_stream.write("\nNo YAML encoding match found for comparison.\n")
+            if not encoding:
+                output_stream.write("\nNo JSON encoding found for comparison.\n")
+
+        output_stream.write("\n")
+    except Exception as e:
+        output_stream.write(f"Error processing instruction {name}: {str(e)}\n")
+        output_stream.write("Continuing with next instruction...\n\n")
+
+def get_repo_instructions(repo_directory):
+    """
+    Recursively find all YAML files in the repository and extract instruction names along with their category.
+    """
+    repo_instructions = {}
+    for root, _, files in os.walk(repo_directory):
+        rel_path = os.path.relpath(root, repo_directory)
+        if rel_path == '.':
+            category = "Other"
+        else:
+            parts = rel_path.split(os.sep)
+            category = parts[0] if parts else "Other"
+
+        for file in files:
+            if file.endswith(".yaml"):
+                instr_name = os.path.splitext(file)[0]
+                # Store lowercase key for easy lookup
+                repo_instructions[instr_name.lower()] = category
+    return repo_instructions
+
+def find_json_key(instr_name, json_data):
+    """
+    Attempt to find a matching key in json_data for instr_name, considering different
+    naming conventions: replacing '.' with '_', and trying various case transformations.
+    """
+    lower_name = instr_name.lower()
+    lower_name_underscore = lower_name.replace('.', '_')
+    variants = {
+        lower_name,
+        lower_name_underscore,
+        instr_name.upper(),
+        instr_name.replace('.', '_').upper(),
+        instr_name.capitalize(),
+        instr_name.replace('.', '_').capitalize()
+    }
+
+    for v in variants:
+        if v in json_data:
+            return v
+    return None
+
+def main():
+    global REPO_INSTRUCTIONS, REPO_DIRECTORY
+
+    if len(sys.argv) != 3:
+        print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>")
+        sys.exit(1)
+
+    json_file = sys.argv[1]
+    REPO_DIRECTORY = sys.argv[2]
+
+    # Get instructions and categories from the repository structure
+    REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY)
+    if not REPO_INSTRUCTIONS:
+        print("No instructions found in the provided repository directory.")
+        sys.exit(1)
+
+    try:
+        # Read and parse JSON
+        with open(json_file, 'r') as f:
+            data = json.loads(f.read())
+    except Exception as e:
+        print(f"Error reading file: {str(e)}")
+        sys.exit(1)
+
+    all_instructions = []
+
+    # For each YAML instruction, try to find it in the JSON data
+    for yaml_instr_name, category in REPO_INSTRUCTIONS.items():
+        json_key = find_json_key(yaml_instr_name, data)
+        if json_key is None:
+            print(f"DEBUG: Instruction '{yaml_instr_name}' (from YAML) not found in JSON, skipping...", file=sys.stderr)
+            continue
+
+        instr_data = data.get(json_key)
+        if not isinstance(instr_data, dict):
+            print(f"DEBUG: Instruction '{yaml_instr_name}' is in JSON but not a valid dict, skipping...", file=sys.stderr)
+            continue
+
+        # Add this instruction to our list
+        all_instructions.append((json_key, instr_data))
+
+    # Sort all instructions by name
+    all_instructions.sort(key=lambda x: x[0].lower())
+
+    with open("output.txt", "w") as outfile:
+        outfile.write("RISC-V Instruction Summary\n")
+        outfile.write("=" * 50 + "\n")
+        total = len(all_instructions)
+        outfile.write(f"\nTotal Instructions Found: {total}\n")
+        for name, _ in all_instructions:
+            outfile.write(f"  - {name}\n")
+
+        outfile.write("\nDETAILED INSTRUCTION INFORMATION\n")
+        outfile.write("=" * 80 + "\n")
+
+        # Print details for each instruction directly, no category splitting
+        for name, instr_data in all_instructions:
+            safe_print_instruction_details(name, instr_data, outfile)
+
+    print("Output has been written to output.txt")
+
+if __name__ == '__main__':
+    main()

From 1a14ba8c8535e16a5e628b9f81ae6c0317d54c01 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Wed, 18 Dec 2024 09:01:28 +0000
Subject: [PATCH 2/7] Fix errors due to incorrect parsing of VM

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 ext/auto-inst/parsing.py | 86 +++++++++++++++++++++-------------------
 1 file changed, 46 insertions(+), 40 deletions(-)

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index be1787874..449a7c583 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -55,6 +55,7 @@ def load_yaml_encoding(instr_name):
 def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str):
     """
     Compare the YAML encoding (match + vars) with the JSON encoding (binary format).
+    If the JSON has a variable like vm[?], it should be treated as just vm.
     Return a list of differences.
     """
     if not yaml_match:
@@ -75,8 +76,7 @@ def parse_location(loc_str):
         high, low = parse_location(var["location"])
         yaml_var_positions[var["name"]] = (high, low)
 
-    # Tokenize JSON encoding
-    tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\])?)', json_encoding_str)
+    tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)', json_encoding_str)
     json_bits = []
     bit_index = 31
     for t in tokens:
@@ -86,6 +86,13 @@ def parse_location(loc_str):
     if bit_index != -1:
         return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."]
 
+    normalized_json_bits = []
+    for pos, tt in json_bits:
+        if re.match(r'vm\[[^\]]*\]', tt):
+            tt = 'vm'
+        normalized_json_bits.append((pos, tt))
+    json_bits = normalized_json_bits
+
     differences = []
 
     # Check fixed bits
@@ -103,7 +110,6 @@ def parse_location(loc_str):
             elif json_bit_str != yaml_bit:
                 differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'")
         else:
-            # Variable bit in YAML
             if json_bit_str in ['0', '1']:
                 differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'")
 
@@ -117,7 +123,8 @@ def parse_location(loc_str):
             else:
                 json_var_fields.append('?')
 
-        field_names = set(re.findall(r'([A-Za-z0-9]+)\[\d+\]', ' '.join(json_var_fields)))
+        # Extract field names from something like varName[index]. After normalizing, vm and others won't have indices.
+        field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields)))
         if len(field_names) == 0:
             differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
         elif len(field_names) > 1:
@@ -128,23 +135,19 @@ def parse_location(loc_str):
 def safe_print_instruction_details(name: str, data: dict, output_stream):
     """Print formatted instruction details and compare YAML/JSON encodings."""
     try:
-        # Print the instruction details without separating by category
         output_stream.write(f"\n{name} Instruction Details\n")
         output_stream.write("=" * 50 + "\n")
 
-        # Basic Information
         output_stream.write("\nBasic Information:\n")
         output_stream.write("-" * 20 + "\n")
         output_stream.write(f"Name:              {name}\n")
         output_stream.write(f"Assembly Format:   {safe_get(data, 'AsmString', 'N/A')}\n")
         output_stream.write(f"Size:              {safe_get(data, 'Size', 'N/A')} bytes\n")
 
-        # Location
         locs = safe_get(data, '!locs', [])
         loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A"
         output_stream.write(f"Location:          {loc}\n")
 
-        # Operands
         output_stream.write("\nOperands:\n")
         output_stream.write("-" * 20 + "\n")
         try:
@@ -159,26 +162,26 @@ def safe_print_instruction_details(name: str, data: dict, output_stream):
         except:
             output_stream.write("Outputs:           N/A\n")
 
-        # Instruction Properties
-        output_stream.write("\nInstruction Properties:\n")
-        output_stream.write("-" * 20 + "\n")
-        output_stream.write(f"Commutable:        {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n")
-        output_stream.write(f"Memory Load:       {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n")
-        output_stream.write(f"Memory Store:      {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n")
-        output_stream.write(f"Side Effects:      {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n")
-
-        # Scheduling Info
-        sched = safe_get(data, 'SchedRW', [])
-        if sched:
-            output_stream.write("\nScheduling Information:\n")
-            output_stream.write("-" * 20 + "\n")
-            output_stream.write("Operations:\n")
-            try:
-                for op in sched:
-                    if isinstance(op, dict):
-                        output_stream.write(f"  - {op.get('printable', 'N/A')}\n")
-            except:
-                output_stream.write("  - Unable to parse scheduling information\n")
+        # # Instruction Properties
+        # output_stream.write("\nInstruction Properties:\n")
+        # output_stream.write("-" * 20 + "\n")
+        # output_stream.write(f"Commutable:        {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n")
+        # output_stream.write(f"Memory Load:       {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n")
+        # output_stream.write(f"Memory Store:      {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n")
+        # output_stream.write(f"Side Effects:      {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n")
+
+        # # Scheduling Info
+        # sched = safe_get(data, 'SchedRW', [])
+        # if sched:
+        #     output_stream.write("\nScheduling Information:\n")
+        #     output_stream.write("-" * 20 + "\n")
+        #     output_stream.write("Operations:\n")
+        #     try:
+        #         for op in sched:
+        #             if isinstance(op, dict):
+        #                 output_stream.write(f"  - {op.get('printable', 'N/A')}\n")
+        #     except:
+        #         output_stream.write("  - Unable to parse scheduling information\n")
 
         # Encoding
         output_stream.write("\nEncoding Pattern:\n")
@@ -194,28 +197,31 @@ def safe_print_instruction_details(name: str, data: dict, output_stream):
             # Reverse the bit order before joining
             encoding_bits.reverse()
             encoding = "".join(encoding_bits)
-            output_stream.write(f"Binary Format:     {encoding}\n")
+            output_stream.write(f"JSON Encoding:     {encoding}\n")
         except:
-            output_stream.write("Binary Format:     Unable to parse encoding\n")
+            output_stream.write("JSON Encoding:     Unable to parse encoding\n")
             encoding = ""
 
-        # Now compare YAML vs JSON encodings
+        # compare YAML vs JSON encodings
         yaml_match, yaml_vars = load_yaml_encoding(name)
-        if yaml_match is not None and encoding:
+        if yaml_match is not None:
+            output_stream.write(f"YAML Encoding:     {yaml_match}\n")
+        else:
+            output_stream.write("YAML Encoding:     Not found\n")
+
+        if yaml_match and encoding:
+            # Perform comparison
             differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding)
-            if differences:
-                output_stream.write("\nDifferences in encoding:\n")
+            if differences and len(differences) > 0:
+                output_stream.write("\nEncodings do not match. Differences:\n")
                 for d in differences:
                     output_stream.write(f"  - {d}\n")
                     print(f"Difference in {name}: {d}", file=sys.stdout)  # Print to console
             else:
-                output_stream.write("\nNo encoding differences found.\n")
+                output_stream.write("\nEncodings Match: No differences found.\n")
         else:
-            # If we have no YAML match or no encoding, we note that we can't compare
-            if yaml_match is None:
-                output_stream.write("\nNo YAML encoding match found for comparison.\n")
-            if not encoding:
-                output_stream.write("\nNo JSON encoding found for comparison.\n")
+            # If we have no YAML match or no JSON encoding, we note that we can't compare
+            output_stream.write("\nComparison: Cannot compare encodings (missing YAML or JSON encoding).\n")
 
         output_stream.write("\n")
     except Exception as e:

From afc753e9a8fc38346d7c87105028db487bc442d3 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Thu, 19 Dec 2024 08:06:33 +0000
Subject: [PATCH 3/7] First Refactor to pytest

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .../conftest.cpython-310-pytest-8.3.4.pyc     | Bin 0 -> 549 bytes
 .../__pycache__/parsing.cpython-310.pyc       | Bin 0 -> 9093 bytes
 .../test.cpython-310-pytest-8.3.4.pyc         | Bin 0 -> 1994 bytes
 ext/auto-inst/parsing.py                      |  77 +++++++-----------
 ext/auto-inst/test.py                         |  56 +++++++++++++
 ext/riscv-opcodes                             |   2 +-
 6 files changed, 88 insertions(+), 47 deletions(-)
 create mode 100644 ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc
 create mode 100644 ext/auto-inst/__pycache__/parsing.cpython-310.pyc
 create mode 100644 ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc
 create mode 100644 ext/auto-inst/test.py

diff --git a/ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/conftest.cpython-310-pytest-8.3.4.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0b8bc1d08667e8e7a0f516d6da2fb738b7cfd03e
GIT binary patch
literal 549
zcmYjOJx{|h5Vg~!X$zG)z`&LPRmzZNV?tFC8-jw=i7rtZJFQC^N45i`Vq|4w;vevr
zydnnv0u$$yRz2xH-Z}5xIp*DN2Z6nR%;w*azYN$n3&03m_8|~NaE7M1&ep17A&ge`
z)WL{Yr16TTO@b%Lw}0SUG)}Nd|7L0j{1RNALU=(-P%}JYzcug%JAj)TOb{r!<Mlxh
zEG5sQlx0*qQgK1o$9J(>c#3;!LA|TV?btJ?zNMRSq8QJ$y`WjK?gW9L1&;_5>$81|
z=}HDtl?P`usjGzNDrPy$=idFr&2=p)eXQFtA-piM0GdTCBo&`XxNwKxVYuKc8pbKl
zCFdcbf<A<T$>cF8bCxnn0x}EfvkK!<@qp!0g)k>oRH{Ky>0VKpGTMzWze(B<diV&7
zHWa?ATeFg73ds6!R8-LpokSb@AeOL`*>E7}lVFO%(%Y7Hx4jOKH`7F|Twh@?v|yTU
Qov%14Gdip{F|Xf8fBQq3DgXcg

literal 0
HcmV?d00001

diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..757e1865563e2916a5b76637ec4bd05e7c6b60ae
GIT binary patch
literal 9093
zcmb7K&2Jn>cJJ!$nd#~I;P6Y|mfEr{YHV_3TSzwM%F@b`Y%8`YuPCpf5ALKn)g)UQ
zP7kYlD6vuFIG_OA2P0k}xx`RF0=YOqZaD<|4<wi5ut_eBAUW(Mx+Z%ff>nO6dWJJY
z%Sd*JuCA`C_p0jEt5?7Gib1JlYxte{=f-OGbxr#<eGLCheEbAYxPydioHeyO>6_hQ
z>RZ2~tG98-z*}#c>zO+lmbS^>$@-d`<IJ`A5pMF#V|FLcvpn}$yJK^U=kYFZ+bi;d
zS6VT7k(VBucgnoX$51oID|{U93O~vx`4ni!`7}R-k_m5;AAX>V1#e<oLtkuBJ=J4R
zF-mQr%8~;2R@_cA;(jAaw1z?Dy&vVXc&^|HYe?d;NRwK`wsldG+Kw)n)OYnywM9*;
zl%WfTd{(=SvMP%|Saess=9aVSMULxiy3LN~tcYM8AJm^8v|O<@>j*FEh?dhEiQ}(0
ztsrt%f=-J&fp9z_0#U0PiS37eD~#M$!%IvxU;~^{VRAQe6XSummE?L{zS9IP*xuXc
z)`E5KoVyaV!XP-uJ>fk#C;YInIoE0VE566)`0_dLQFPAjM8O>KKi75zI$Eu@w-PJF
z4C?5mKtd=Y(RG`ZSr&g56I9S(Ncul&X5mpX`%h#dZCmRyNQ>=iT}TX4({~x9q;fk3
zXB;x55^i)2Jf>t<wWX8YjLbylrY7EsOqn^Vb<HSqKf9BQEQxP!9@cizUX|sgvCG7d
znAReCW8=6q#5;qU@G8$NYLP7sp54~}wA9T;g`J|z?y_#~Q}k+TrnYT>+Im9^Z$HFH
z=<hb3s3dbd=cBf3A#Xtu4dnAGpI@bNTjr&`qGPq~_*b@r-+TV1*K*vJL!uNNwaQM*
zUH6=`YU%%Q@y0@R)>-kfxY3&DGy);KupP8GMDE<a{LzP~YkJ_ooo3*2$B#nCYc&E@
zPYYdB{e0@pG@DK&wwx$%*4?PFrr4=o>#9GLyF(0@x10_wt8??($2aQN7Zz{b{N&25
z>o*n_RbP>CTj5F|zzTX9rky0Lp0L`&UZ%F?t~aat!a~ACV)ciUWK_pVe$y4cyWI4`
zYB@1NFG@1a;GriH!{t2539sFBp|j)P>p9VbFfrS1w3e9n1HYA&Rg<gNZ(h4{>&DI7
zN!AZZ8i^USy;fq<D3ZL=Ns^7yziI&+gJi8{o!AXX&WD`5FtODr((*FV)`?kN^;c5g
z4YXSg#8zSwS)y;PB$+J|vKWW*CbnwSo21HPrlHzdX+rv*Owd^Bs=6<kDU>Ulk<D1l
zW)sZRO>7wYn_@*~;#>c`qMLfrD6$jJC@nHe7q5YC#0VWm0d-;!!#~qC7@#o2*(xlV
z)I*!=DyQv0k-536V@oh;Y%`vDV2D#Zn|^((@!S)GTBf$NYx){C7;UtPZWbFV%k87u
zHoK>Hb6rd3q_v{U3`z?}wF}zDOr&F@7RlIkq4r&ym%0U+2X(1il=hA(3$nP9ZtDUs
z|AE#mY`iNAd<<i7c?G%hVGUz2(=mWUN!p;EM1SS(7`SKnxE$Nn#Ri|~j>!@@mgQK=
zal@0Pt__+>l;xRi8uQXu8a8w3OYMjT!!Z7Zw!)O%w}|UD>rTjt@GoU$$K=yrQoo0u
zz(6o<<4@!SKMaaVInIy3wEUBt#G8p<4z)ch$3bySP9i=2B-f(@1?KXPL+wsHzn$Xy
z??dfg=yA&QIAwNnyBg-eG54ud-k95Tl$DS>$!S|h?eVYJ7YwsHiPLOoR^KpT$!SPO
z%|mhgFZ$-E(tKp8m2&utJ9#<1%XW20;}CS<5HtekAn4Djv7MGvnBPk>ht!bPx7rh!
zLrCb?y|(2CCfdF?)b?e02yI^(qOej5S%L986aRxUN>_q)93GysM9P%)_k(jbHBqnO
z6cl0AdFV%}y&`isFmGp<eL50x_K_W`rbg)cisGkQuDKzMTHo9_P5*)CY_8wEv@{Dl
z>4a-Rr^%gV3_y6;+1x?%`<*aCo0Qkh^r&?~iPB&N=kUr3&O{ukJ(CyDEd;66Oh=3(
z5-i<K*Kd-AqoKiO(=`1sXw~9Z(R_3gKiGDU&51Jh@lyv${DKlt+6gRD<N*RSz1AuO
z<A+W>GwXPd+FpZZasJJkbH!~DDoDHboVI{P6wR&L$ni^7f}TY@;YlPoc!79!X=k&p
zj@4z`3@P+N9a{Q=m_@6qo@Byy(~lCvZxNu#HG@VSCN(kF{MEI@fE_((^vZ-1ygC}H
z1LIxlkw9RXRapFHZ<wi3ybnRu`<hU@KqSrZVr`T`V)a=iCLLe#*|V4C@4o)VlDvC)
z?v6VbzdrZY(rYNYclRED?cS2a`_jFo>ZR(_Q|R_7p^&G~@QdFXT@D_2p#oHHyY0Dx
zj!P)W`8S<qoUAqHT8l%aBKmUT;^z9jyLW$nZ)xe(drOJFx&A%r(iA80{PzhYtB~)K
zoyu0SFEt=dz?)y_8vq0Yd3|RjdhPYZXydHYqX?jrEf1X@_65>K64*tL7{kVEC$zZs
z{}JJUG&bL%37npHppT=NLWzT>rx0aLkfgBU?Dd4b5g(gRl|9w95ZL1p6pmWY#FZIm
zxf7+@O&f1!kD>4Y!Av~UE7{+bM7>W4T~2w<{GcqwJVrE-m-sOakR|%^keI|zQ22=2
zT}HCUF2ygN9afC$b2MX*IdT4HJ!s;@v(J%Dy1P@T?YGjYV))p96Z6hT@C*Dl?Rl~R
zC_B;v<^7gq3smc>E$E=eMgdCEg_q<A-?`0Zk|nKcaWO&F8DOkcFUk0kw;l?@2;w-B
z>S0l({HxSvd}Oz%O;}OTqGuzhM4YFV6MaBZhi3T-wIv1?LA*iC-lQhkD0twt!X&Sl
z(omB;(mDp`JxW-VOtr|;HvQP+_5C*DZDMed7!-!=V`8ZeL7!w4-=x?lQNy&*cZ#*f
zR5^X7l1mEettwOWRMo{L>f{<F3Pc_IVSWD*8ifLhnau(^Wm!=#0#aGJX<D#06Zi(8
z0<<avYFRpgErL@f>Ii5NxI!5oodB3_F$eFHO#j@{k06~gsBQv$E8uj<D5I{-PNT-u
z#m{Mgzeh<9$p|#7pn!a=D|o^R%<eZVu1lQ^t|Zu@r?5knC;*~zCLNn9ryzmKWmS7N
z)ILY1n!wt&0Zzj;wyG)gYKke1Dk~|E#E5ihL<W4+oA8_n3gR#qHvyQ+N3|B?d3abf
z#xZFg;9MCUSv;+{j+6fbfHuMNfPWc9J1H{*+E2AszVG>h?-X&lt@Ws<N2s%k`jE{2
zAobytTD3beIv&8JTzpB5>8P5iB`rXu)9}Z1nUCPxiGP*3ot(^Lw8!}IT^+rgP+u?f
zzD_<diRZ>IsU1AY{5Fop-gkZ%9v?roV@rTO_;n;BN7^GJEey@`MWW=Vy7p}?D##Q(
z7XXI~{B(x_JdG<3FUi7yLo1cu%S0&)Xmd;2lt!SE0MhU$OR|KL1*}{tT{-$voL*5>
zCCvC}x2#Ie$nt=9RN^yAi_TK%6EiB`ALG@pbq1~6INvHLidTt8ZxpX7eo9*@2T%C)
zB0S-!@(tVif@$3`rp3n;)ojWa@*3M_s>gxm&ZV@sLERnuY>arMxrgkbR;|TXKNdg~
zJv>KHX4os{UIbKy(-*F8Z}+Ri=-kKw9@Y|O$J5UBA%FU6N@?$%fBSd-+3}S9jvM+7
zaNPHI=h4I5bAEO_xd1RS?~MH7$;)BrtuG_A^R7awRG;K8hwF<G=DQjj3+FD!lZ!B`
z2PhMh3geu!yafl<jx#R;^@Qq3jwe6tVW}aSxM1G^&Vz))c{?dxZ}q-D1Y}6^Z4xe_
z(s=TEtKEshAxg)I4?m|61GAIzjZQSgD4xXkLGt+Ua|($%p0uy^!x0~+xD`V))$;rl
zrce+ROxRr<b9Bn=u_Zx}G@ewe+9yp%8qYqb@RODT+Q4ZFA?|wz_Ch?VF!KQ(DUcp5
zT<8bT?D)+7a6^X&3JEIEU30Duu>ZWhXFATD6PE{t&{Zea=bi6ZVp7~7K0CzeBZ{~v
zKrwVe_7IjGzh__BcM;|fLU*qxy%0{3AF7aynijysh~mTjiC?%z!9PO#a8Oj!;SChO
zawTX*eyc-a9pzHB5Xl?fW3^h%wyWbw=Ai)SQ1KE$5H>Lhbc*+By(q>hGL(EkH3s2+
z#B4S_;CqEnR8&Hpgh@uJ9NKJ!Nj8jl(199Wd(`mSgg>g4!BLr%PzR+VXyCc)=n#rd
zo<svdomM?)he-}CRHoDyNnJq{ljfKh$g3#K033-=X@HC;%o~D>3O<}vK_N9dpd0j=
zM&9Z@8}kJS!w889cwjRdLCgtu41Y!aG;;K<BQkS{zy;Iwe$F;*00*67GoU)lD!PT1
z)XJuosGR~W;0P!HJ-`XD;<rW9HY$3BonS|p^|}6A@yF<R1pQhl*bgn=!w2FqEJBb}
zYP%HSc=?{uMJxwLwTZ726b#+b$&VASM%sM?5dfHAt|OL_Ro^&RK#AxLVmC}?ce1+_
zw<&Mn3*NaAN6I_D=`}it>~2y>TE!m$PzM3Rko27*k_guko<vkkS!QUIa2q39rQ(^e
z+s&Y_ccAm(7x$uRsjn@@FtOCz$cdxKC+0)9`GCTQJS1%u$EeU!o+o-CrE5}bPE4b`
zY9(bFNuB!wUNFVz#?|-G>LPY1f*=wl;*+608j2?J;zcSbfgOC)%{1V5P+_JK)UJPz
z;jcg#5->N-DYfPmKtX84=lcJc8Sxb=8swk#f3${qJPHCXBSZeS3Uuo`EDddU4Xlkp
z*9E&&+bivT86#!*TFds<+@>{`3{k3@%;<n7i`vG^+`wv@@!zV|yo`{+dOIS2HeJj#
z3QVQ|7Xh@G3WZZJlg{7NDkum(i_^9h`nU_A#(n#h>IhtKDntW>v6W9TPYQwfEfvqy
zW>PydQ%8J)&TRyssRCHv4>}<<Af({Sb3(}~@}X90p=zoXPF<!<2M&^;ux;CqlqXWn
zrt6nFEh-rM5=5I!52CqsT#djy!O!&)ayxvt1=ogc9$e%SNhWn{Ad~+9myrB+D?^)H
z#}=lWj50I#tUQHg^=Cl*-=M*sof@xN^e7s^{G`(RnEzkkNlWJ9MhLeY%+(`3EJwJ}
z*an{B2m!<=f#VQh!>z*@-Bs*lr9t68;7b#DOCPy;I5T|n08D)XSj^gbA4m+BWN`(2
z%_uJs00p{3_=XT0P!|ySXBp}%%@OJXPR=3>Xsa;e@24R~Ak>^Rw-LUTd9HUfu?Bt_
z70k;%F{0xA63-D<+_)jlowCdm0*=O{9aXmV@U#84)`7O!o$>VkgHRx7v-ja8aQm^w
z3tyuIv)6eMvws)i;=;}(FDcAPxiaOVDVarp66hBtW3mWrS?T#eS(v>N#$E!}rP^^>
z8q{w5jVhbi=QgQwL_@(Cgo~qTi4e0a(7n#a-+xcm|28aZ%x+4;S4gEQA<OD?{CBCr
z?4%Y^;ShoAm{q?quWH_?Iq%_8Z`c;4H+y{pifz?1Td6Tb1n*-Cm<>XiWUkZ}N<)Cd
ztWuKm)JF^8FY=s$I;Q(?s5R~l6^u@a;vXaaGgOP{i@Hbb*>(VlO>BHVY~%+cT@d8c
z##1Tiz_G3FoM@o)PCRw>+B=`TH$Q^IXX4qJ-n|w<nCj5QB|0QDGwXy8{B|2Q0#HOe
zAoj2!2nzM;RiKCho9cel=jvi~glOd8q}UZ1U=`q`+ehp#c2pT%tQ*3a)S$}J69j>_
zM+Yr}b)o65FLU?eNwf@!RZ)vih)xT)LWFk);mLkOaHHcoVGrZuH?J>Vnfuv@2o^i*
z>j+uf;*ZcMK5pL%V8w=8gzynyu3$#%Xd&)Xv(xs~Yqu_6|M1#XXXqjh`FC&J{OB@W
z%GvQxe`n9;{~)mA<2Mu}b10a%?BSY{P+^2O&XDRML8eiiapDN&HYlMWGVbOO!KFJr
zm(!68y%2<@L_o<$l#tOB6rxe65I5E-swAj61TD=l5Rq7?LWMjD7gfi4(J4|h@t9Z?
zQf*h4bqQNhkr(k9)l`63hT&b9jH#HEQus9XWc5KvVyW|wF2#HIgLITA&`DiU3=1oj
z0#*VL2Q|eWD8+A39o|A>T9!e9CSXEfJ|Mhj=I3P~%Cf?Frda_Zw15wFg(&G=F)Bt8
zm@!3%MTHk_AjdKen-jk&kC{yT6&N@0?Eehp2#;UE6B0U%aKzk)(dP`eP;?Zi10Z6!
z`B8x-s^tdIplZiW<XDuEIGlKfXTJuPgdx^o_Gbq)9D9hKYHr~vbr`?@N$9R(qqn!B
zwV>sw{}JeKiuRUsK{=YMUMoFLNrV@j3$AFasRbFpw2Ok`bx&z)iC-YW{`r<(*>ug7
zl9;Yo-Bb#enBF7ce3C)c5Pv~LN(ZvI6i4{=;G&r8uHSl-w(bO)LhKMMf%h_^dN{|Q
ziN8j1V%6)^u3ksqG&j7|NTw7F6@N~>T&F}0rqBqQO+ZY#_7-Z`r>IWt0ktK_&J28@
zRO^+$MEaX<7^VL-iTwf49Y(!AG=72&X{gh>upaOZ<VVITJcGp2(z{{uGw7z+)&WRf
Un)ulIz$)3()<vrbKjhi}0&Wp|NdN!<

literal 0
HcmV?d00001

diff --git a/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..deb31b17abb5f4e0bc803f28547204fba3f421a6
GIT binary patch
literal 1994
zcmah~&5ztP6t|O1lF4Lup%M}mRpl0hIzls3gv4Q3t%~S}ieR_e7FCQuqipQi^^%EG
z+u4sObJ|`J;*5}HkNh(nIQq&7i8F^@;5jq<QK)DlJNEN?&wlUs^NUzni4Z)0{g&?j
z86xz%om_rgnB0M;9zjPD#Tgpm=Icyd2VpezMlHNp@kf5M5{v@)I$1bvkJ=b9e?Ouw
z^`4^>JnGOE^`E0rOz~sX4gLVHXtRq=B+GnUBvLV%Vk=xe9(-;444RVA)o35hF~z5N
zhL>Xp#*TLO-MLpc^5ytK-l`qyo;ovE`xJD3LO(kv7$ca$%B7iKyJxc+*G}zDTzksR
zs?+Q3E0K>UJY!YV>q%CKG3By~M5&8X_w|vk-uz1Oe3yLwcx#i~2gM<&t}R5{psu(U
z@|fJ^lBIS8t5>!f<3^c0O7z~4bOYn3BA@VG)81uT6q<|NI6@h(00k2yJE|FY`-11D
zRY;!eu4e+to|Q~#(>_0^3C~VxI*U^_$NvS%RmkIu;b2cp*&vySTnRCtOtS9=lB@Kf
zSLS@e8ST-X0Xx!zq|~CvbEO9wcIX$!rlXiH3&2b7sg_0N3Usj(<JgVyDqeM<;mDD1
zfcPai+aLg%E#UkW1~vM|odJ9R4O*m};_uy=S9=tdNM6;LE}(MeE<k8O^Y&YF0O}O0
zTd?XiK-$$-vl`Sc^-r81;#sS1&BIeP^Pd7eJ$O%CjPme9q}C6?kA1&}MuXZvK=L8@
z3au~NZstkt&)Z-f(1>=PqdK??^qqyE6`$|8((H)oYGa3hDr>-HxdL%jTTegT-h8<E
zU`RFvvB{HVYOz#gBFdZ+AxV-GCM66+nwC;B+9$WZ<Wrs!kx`;j$qP*m6PW|6y44Nv
zvw)ElQqVGGl<XXni+j_rKDoEOy|q1Ds##8gxgxpHB$X`Duil|KL$|uNB%`JHf6NS)
z)jLmwPBQZFaxC|4EJJd;dhnWXd4Bm+{}bkW_wrOwh*%LSnxjt>o$mGF;!GyM(L7~p
z_!6faNVg^7V`%C+bi2T+Z#z&P@Eo1mVu4PvbZY#g^UMK^@C)4QI;I7X@WMn&CsM0J
zuJ=Ib^luw4<2h^Y$f8>VmnK@zxYDVZ78@Vgee9;!QT4IC5#tLC##4*m81NhKu)XAf
z3)x@KM4Du30|sX<&XGC`me7mn76iHR3N9>>`ryp!<rEQsx?sR2AkX>c)fYgUC5MzH
zUE>1`nT9fC3GoCXAVs1SQ)H*qq=^7jEd|RIvJK+1P`Z({flGUhjb9ww=#4i?cqT0Z
z(r@JK2;zi-B?^`s4~R!0%SFbuO|0s+o3bD~)*!Z>y)VX>OqH27cwz!5fSTpnv>F2w
z+2o9y@xNiY`TG371>V*B&_#IF4V@6`QtU#HV<*Db{tmqmhkghZtC6|#T~O_=m~f}$
znTCsN;ycCh_<VMCya>B*GbZhg?n`zkxn`z)F0$o#A&bDcIb$N_+u*s$lL_pf4mjRq
s(*zTKq)W*bd9nLn3e(nV*-$WCpTf1z*oOT-Dx0~RD8w=LAdYzLA9p@FdH?_b

literal 0
HcmV?d00001

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index 449a7c583..d227061d3 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -3,9 +3,9 @@
 import re
 import sys
 from collections import defaultdict
-import yaml  
+import yaml
 
-REPO_INSTRUCTIONS = {} 
+REPO_INSTRUCTIONS = {}
 REPO_DIRECTORY = None
 
 def safe_get(data, key, default=""):
@@ -123,7 +123,7 @@ def parse_location(loc_str):
             else:
                 json_var_fields.append('?')
 
-        # Extract field names from something like varName[index]. After normalizing, vm and others won't have indices.
+        # Extract field names
         field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields)))
         if len(field_names) == 0:
             differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}")
@@ -162,27 +162,6 @@ def safe_print_instruction_details(name: str, data: dict, output_stream):
         except:
             output_stream.write("Outputs:           N/A\n")
 
-        # # Instruction Properties
-        # output_stream.write("\nInstruction Properties:\n")
-        # output_stream.write("-" * 20 + "\n")
-        # output_stream.write(f"Commutable:        {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n")
-        # output_stream.write(f"Memory Load:       {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n")
-        # output_stream.write(f"Memory Store:      {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n")
-        # output_stream.write(f"Side Effects:      {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n")
-
-        # # Scheduling Info
-        # sched = safe_get(data, 'SchedRW', [])
-        # if sched:
-        #     output_stream.write("\nScheduling Information:\n")
-        #     output_stream.write("-" * 20 + "\n")
-        #     output_stream.write("Operations:\n")
-        #     try:
-        #         for op in sched:
-        #             if isinstance(op, dict):
-        #                 output_stream.write(f"  - {op.get('printable', 'N/A')}\n")
-        #     except:
-        #         output_stream.write("  - Unable to parse scheduling information\n")
-
         # Encoding
         output_stream.write("\nEncoding Pattern:\n")
         output_stream.write("-" * 20 + "\n")
@@ -234,18 +213,11 @@ def get_repo_instructions(repo_directory):
     """
     repo_instructions = {}
     for root, _, files in os.walk(repo_directory):
-        rel_path = os.path.relpath(root, repo_directory)
-        if rel_path == '.':
-            category = "Other"
-        else:
-            parts = rel_path.split(os.sep)
-            category = parts[0] if parts else "Other"
-
         for file in files:
             if file.endswith(".yaml"):
                 instr_name = os.path.splitext(file)[0]
-                # Store lowercase key for easy lookup
-                repo_instructions[instr_name.lower()] = category
+                relative_path = os.path.relpath(root, repo_directory)
+                repo_instructions[instr_name.lower()] = relative_path
     return repo_instructions
 
 def find_json_key(instr_name, json_data):
@@ -269,21 +241,21 @@ def find_json_key(instr_name, json_data):
             return v
     return None
 
-def main():
+def run_parser(json_file, repo_directory, output_file="output.txt"):
+    """
+    Run the parser logic:
+    1. Get instructions from the repo directory.
+    2. Parse the JSON file and match instructions.
+    3. Generate output.txt with instruction details.
+    """
     global REPO_INSTRUCTIONS, REPO_DIRECTORY
-
-    if len(sys.argv) != 3:
-        print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>")
-        sys.exit(1)
-
-    json_file = sys.argv[1]
-    REPO_DIRECTORY = sys.argv[2]
+    REPO_DIRECTORY = repo_directory
 
     # Get instructions and categories from the repository structure
     REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY)
     if not REPO_INSTRUCTIONS:
         print("No instructions found in the provided repository directory.")
-        sys.exit(1)
+        return None
 
     try:
         # Read and parse JSON
@@ -291,7 +263,7 @@ def main():
             data = json.loads(f.read())
     except Exception as e:
         print(f"Error reading file: {str(e)}")
-        sys.exit(1)
+        return None
 
     all_instructions = []
 
@@ -313,7 +285,7 @@ def main():
     # Sort all instructions by name
     all_instructions.sort(key=lambda x: x[0].lower())
 
-    with open("output.txt", "w") as outfile:
+    with open(output_file, "w") as outfile:
         outfile.write("RISC-V Instruction Summary\n")
         outfile.write("=" * 50 + "\n")
         total = len(all_instructions)
@@ -324,11 +296,24 @@ def main():
         outfile.write("\nDETAILED INSTRUCTION INFORMATION\n")
         outfile.write("=" * 80 + "\n")
 
-        # Print details for each instruction directly, no category splitting
+        # Print details for each instruction directly
         for name, instr_data in all_instructions:
             safe_print_instruction_details(name, instr_data, outfile)
 
-    print("Output has been written to output.txt")
+    print(f"Output has been written to {output_file}")
+    return output_file
+
+def main():
+    if len(sys.argv) != 3:
+        print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>")
+        sys.exit(1)
+
+    json_file = sys.argv[1]
+    repo_directory = sys.argv[2]
+
+    result = run_parser(json_file, repo_directory, output_file="output.txt")
+    if result is None:
+        sys.exit(1)
 
 if __name__ == '__main__':
     main()
diff --git a/ext/auto-inst/test.py b/ext/auto-inst/test.py
new file mode 100644
index 000000000..6f0b7b21a
--- /dev/null
+++ b/ext/auto-inst/test.py
@@ -0,0 +1,56 @@
+import pytest
+import os
+from parsing import run_parser
+
+@pytest.fixture
+def setup_paths(request):
+    json_file = request.config.getoption("--json_file")
+    repo_dir = request.config.getoption("--repo_dir")
+
+    # Resolve absolute paths
+    json_file = os.path.abspath(json_file)
+    repo_dir = os.path.abspath(repo_dir)
+    output_file = os.path.join(repo_dir, "output.txt")
+
+    print(f"Using JSON File: {json_file}")
+    print(f"Using Repository Directory: {repo_dir}")
+    print(f"Output File Path: {output_file}")
+
+    return json_file, repo_dir, output_file
+
+def test_run_parser_mimic_old_behavior(setup_paths):
+    json_file, repo_dir, output_file = setup_paths
+
+    # Run the parser (similar to old behavior)
+    result = run_parser(json_file, repo_dir, output_file=output_file)
+
+    if result is None:
+        print("WARNING: No instructions found or an error occurred. (Mimic old script warning)")
+        # You could fail here if this was previously considered a hard error
+        pytest.fail("No output produced by run_parser.")
+
+    # Check output file content
+    if not os.path.exists(output_file):
+        print("ERROR: output.txt was not created. (Mimic old script error)")
+        pytest.fail("Output file was not created.")
+
+    with open(output_file, 'r') as f:
+        content = f.read()
+
+    # Mimic old behavior: print warnings if no instructions found
+    if "Total Instructions Found: 0" in content:
+        print("WARNING: No instructions found in output.txt (Mimic old script warning)")
+
+    # Check for encoding differences
+    # In the original script, encoding mismatches were printed like:
+    # "Encodings do not match. Differences:"
+    # If we find that line, we mimic the old error messages
+    if "Encodings do not match. Differences:" in content:
+        # Extract differences lines
+        lines = content.splitlines()
+        diff_lines = [line for line in lines if line.strip().startswith("-")]
+        print("ERROR: Encoding differences found! (Mimic old script error)")
+        pytest.fail("Encodings do not match as per old behavior.")
+
+    # If we reach here, we mimic the old success output
+    print("No warnings or errors detected. Test passes but mimics old success behavior.")
diff --git a/ext/riscv-opcodes b/ext/riscv-opcodes
index 5ce8977a5..9226b0d09 160000
--- a/ext/riscv-opcodes
+++ b/ext/riscv-opcodes
@@ -1 +1 @@
-Subproject commit 5ce8977a5961a6bbfc1638e6676e60489665d882
+Subproject commit 9226b0d091b0d2ea9ccad6f7f8ca1283a3b15e88

From 29ec73767e4ceac81fb58ca9793506d88f237354 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Thu, 19 Dec 2024 08:19:59 +0000
Subject: [PATCH 4/7] Allow 16 bit instructions for C extension

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .../__pycache__/parsing.cpython-310.pyc       | Bin 9093 -> 9422 bytes
 .../test.cpython-310-pytest-8.3.4.pyc         | Bin 1994 -> 1944 bytes
 ext/auto-inst/parsing.py                      |  30 +++++++++++++-----
 ext/auto-inst/test.py                         |   2 +-
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc
index 757e1865563e2916a5b76637ec4bd05e7c6b60ae..09227159071d37a12bdaf3dbfea20c3722291076 100644
GIT binary patch
delta 1997
zcmZuyZ)hAv6yLYEx3{;qdw-M5U22ofB{g1?G-#@gwzaL5)*4ELwu-gwxykNb&m?!r
z+$L?$S+4e~Nu@~WC<qeG`K6%rL&<lGBH|ZCD2QAU>c@a0f{6H~zS(Q_1n1b_zL|ON
zH}CyscJ9r?-yR8<bzK#(eewNtVR-CjIOnfPynnx?@vY(G!>976C<qiPehUh4OrXAs
zYKfjDtP7T}0VHNgC9wRiL}48)*$Om7*YAkbUk579_{fl@w0a*1R**`F>YAT3SRqTh
zEVyz}a04|eJ}aD#y9$-5V(Dq24yQyn=&CeG)x1apI2uk1dxV8C$G0w75$eNbLIuIq
zsANS`f~(UIGU#rYYU_Tg({NsF00>mKqT@Kba2%}|rbDc_mqdckbP1SF0Mn^qK1dd!
z8j>V%>8KmSg=DLP#u_5IWOceR8bRVHjkSmumT1J)FedH<s8k1x%Uuzuzm?94kP^~@
zn^+d|U?r`t%L-?%gWEwnDmQ6--EXB<_~Pj`)DVQ)!Zq4qbz?{;O;}r&1o9K@#5a&{
z+MFMv2@KgvI}v-<0z44oFaqnk+veB%|9&3tZ*6|YeI|dS)}jimD-8iFsiOkPe>Omx
z@vlNfrmEFFBdh{RdlY>%iD_hL5E17P`#oU?)`DJQf1oy9s10r*7rM?jc<U8d22^C*
ze-&AU8hMg-p}d|3JbV=vGJM2(_(9JJdOJU8J+iLSt_Czjlrx1#D^&+fOvJ>@w#2gk
zCblNG61wjjosrP@LYwb)nn_{)Lv0~SD}=_63E5%uAP54>C?D5U1KW|jQ7ly)k}H{W
zwo!3R;#7=tMdz$BcrZ6OY&d7_lHm~Bbhz2_U7U7`<&t4mj7Og^rivIaTP_uLczl!S
zMa~r~HimjpMTa|%J=X5%d9tq+H_;Ri<~F1FZEhL}f58X|v&mq8I5ce7=jZKd$F_`W
zH@g@-38U;z@DLnj8TIuLmLP22!&V(&H`D>Knqaq81|?Pq{aC98Uo=V4oSL<bYUnA<
zZQAtmt#%s4icwy4jB?%}W~pEs`7$x8u@S7Hy|NwCoc}@NHwV8_68Mna9Qb|Hf<0(u
zp(##HwfZ6KhsXiMx46$U?5Z}xzSK_Xp{Cl>mP^a}^u2PL^q~vO={*CH2c_m_?NY%x
z+l*c?=Vo*BrsLS8l*8R2kF!;Mt31X@cd`wAxBN6W_p#sgoV=f#huF*E?7k|Rk}87)
zGQ@qFs6h%8QIZshLL4-o1PcDbpyFr>B%cNv`bP1QM9mjvx5B-H9pV6p*pFibvOri&
z`ohqIQ5g^~N+L1&db`-Ya7KxvIIB>7bt4-Y5wj<#iih!pqfvo|s7BSggjbo4W0Q_V
zbv)W(@3>AfH+BGavRkoHSlReBHZID?Ip^Vxk&aJb<aut(9Q+)PA~by{LNh?@C7V=i
zvW1TiaWI*b+y=YY_T*_e$ySo%@E-d-c?QDF=<0z>8~eIu0Ltvs)LyvDHd9;S>P8^_
z8Ne^>tL|;UHh%7Y#fQh{xt_OSnBDGq0jAkNZ=XEHx#!qK??<((+)QzBIP@SirPnIu
zQqw<MHmyoi!eefcb9|C;nB>4XEFv`JieuS?v<l+qsI4lgQYH?W=Yw9+m(a>ann6yV
z%NJ*DvfRx%LU?HB7aeb?33(FW5pS@%@e-DP$LJSDxYl<HuCvp9-|ZjA$xZyD6Ue&=
zSs)+sm)u&xOGz?GVaY3}DcR?X4!O=fd}NIWEwj0d0sGlInJuF?aG<H=a&u;}l*=I%
z{}%A&5qA*&-%8%+Fu`Gw{g4?Q@J@#(gtvRVB;sl4Ny1kmH`q}BMC6Pzq3CL-^0cC<
HAl>^L(dqhD

delta 1616
zcmZuxO>7fK6rMM`Ua!~Q^~R24n}o!63@8#Pln6D2CWWdEr4fh<RHPQ-8per25)zM>
zD5bMC$f-gqRSF%Ds-m=Ug*b4lTvcihJ)$03`ge;|IP}C7ajGiOH|rcysVnW9Z{B|2
zd-LAR?8D<99dqVw+ambv{^R15vHxqQ6sl`#zV)PWjN+?&wOf=BL24leQ*fFHTC*4p
z5ZNG1V|tVNng>E_01M4%ypI{}*&V{d>zZORlSP(^uTK*{B;Yl2KH(ce7Y4Jkqygus
zANI|<2n%zPiV(V^S@JTe?(yh`#$tkEg2)u{EoS>s5ka}_$Aq<^i>QcA(k1{AQRWO2
zKUy6_6vsuBnX7zhf{W1Gi@*rBuuz@EgpMD_=poi2;!Vn{EaBJV!p4w}h_^#lE5i0I
z6uF+kLJhU*^Q%N?%)UV~Buo5`6*37d$vT%!6}AC>LL_Qm2zNtgsT(SB*Se+_SG_J0
zERBk!=wO*e!XJtxj=;a)#?9762TFE`B--xv@RkrISk2Gdh&}($3j#md#@o3SDYO+S
zv=MF+EP`S6o$Z!maVgIbrU+xF>ZC#ObN4E&0M^y}-@4X-n<9n%2`ULfK2>wp*T$Y1
zY*votHzFd{geJv)bm2MZ!c&o+p{O4W7FQ6RSYW>hqt%3YhpZbxguf1Lk)0VG$;Y;l
zi=qq3yS7QpwnVt6$-wTbKgw0}D|s(+-TE8$aR-m=lbk$c^*ej^<|<1Im5W}5<%SN(
z8S5R`D<4>6FedHj2X3EYf-=wXx%!fn(IQ3OcTouyc_Q|wd=$+N@<Yhr>aw)WVMGJ4
zNiSSz&($^IN6;)Oa8!O{?~6p<F7xT~h1rUHY)^nC-*iSGEkAL3lkWTFx!KY}+4Cy=
za;fHVJ|ur}b{a1z@d5eNIc$t5I4XzYCF2zZUzeZ72SzwT-7r8017b7=2{5Uyo8Tgf
zQ62pzIAEbS0Xlw<p~uA+u@?K(?aS*(OrY3BdY9%vFVYJ@uj!Oes5A%Vm|HMiyzy&r
zwbWX0_t8CPgoS%?#*3+&Cai{rH^@fMNlzCxZftw6PRa*~qc9}R<RGlHhLgk8cuPeb
zYb~bkgZqZUhB6^##^wHW4@}4t>0NM8*3#$Uto$uK3@g&gOh8OdXL_ODx{;Xyn3d16
zBYnT4s@91{*E_LD3oCH2JbI1(qlfrv>(q`P03OSD&n{SN9qf5mgBf|V?_=ndJ-Jix
zo}A0&jgrcIN#4qR8C_K{qfAwf=6h0}g7eCpP-aaTF6Z;zuZ|<!)NAwH<8#Uz6jDbt
z5Z{bks?A?6O-|2N__Cr!=dXARSG>Tp36ngm1Ru(u@@KKgVxgZtBl39R9Nd-n3cntC
z5x+jczbfH3(F_2;qXwg$LGARWUglGayc)pD(zM6#Du%{C$AK@)Z~JrO2k_|D5;%8)
zu2|FphpI!MKj-(A&@7ea%F~xiB}|}x<TwQBI(b$_xvk7`Ww^{22aCZe2pSRWlPZ)4
c%?etg^7H%BD~`ucnd7EyrOZ(?W?`~_0Uy?Xod5s;

diff --git a/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc
index deb31b17abb5f4e0bc803f28547204fba3f421a6..bf345f660203a51a0b63b051ab24a0a32a775131 100644
GIT binary patch
delta 53
zcmX@bKZBn)pO=@50SG=^OHRMMk=K}&OSUAnxFkL&r!04KGHWa&7bl!Q`7zsb0J#wo
AL;wH)

delta 103
zcmbQie~O<spO=@50SLZbN=|>Xk=K}2*{meBxFo))G%vm&v8Xt;C_XncH#0dtKPM$V
aDK#UpEHl4ovlnYDqp}iVg_Ae3JqG}2swVmX

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index d227061d3..bd5fac67c 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -52,10 +52,14 @@ def load_yaml_encoding(instr_name):
 
     return yaml_match, yaml_vars
 
-def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str):
+def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_str):
     """
     Compare the YAML encoding (match + vars) with the JSON encoding (binary format).
     If the JSON has a variable like vm[?], it should be treated as just vm.
+
+    If instr_name starts with 'C_', then treat the instruction as 16 bits long.
+    Otherwise, treat it as 32 bits long.
+
     Return a list of differences.
     """
     if not yaml_match:
@@ -63,9 +67,12 @@ def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str):
     if not json_encoding_str:
         return ["No JSON encoding available for comparison."]
 
+    # Determine expected length based on whether it's a compressed instruction (C_)
+    expected_length = 16 if instr_name.startswith('C_') else 32
+
     yaml_pattern_str = yaml_match.replace('-', '.')
-    if len(yaml_pattern_str) != 32:
-        return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected 32. Cannot compare properly."]
+    if len(yaml_pattern_str) != expected_length:
+        return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected {expected_length}. Cannot compare properly."]
 
     def parse_location(loc_str):
         high, low = loc_str.split('-')
@@ -76,16 +83,18 @@ def parse_location(loc_str):
         high, low = parse_location(var["location"])
         yaml_var_positions[var["name"]] = (high, low)
 
+    # Tokenize the JSON encoding string. We assume it should match the expected_length in bits.
     tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)', json_encoding_str)
     json_bits = []
-    bit_index = 31
+    bit_index = expected_length - 1
     for t in tokens:
         json_bits.append((bit_index, t))
         bit_index -= 1
 
     if bit_index != -1:
-        return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."]
+        return [f"JSON encoding does not appear to be {expected_length} bits. Ends at bit {bit_index+1}."]
 
+    # Normalize JSON bits (handle vm[?] etc.)
     normalized_json_bits = []
     for pos, tt in json_bits:
         if re.match(r'vm\[[^\]]*\]', tt):
@@ -96,8 +105,8 @@ def parse_location(loc_str):
     differences = []
 
     # Check fixed bits
-    for b in range(32):
-        yaml_bit = yaml_pattern_str[31 - b]
+    for b in range(expected_length):
+        yaml_bit = yaml_pattern_str[expected_length - 1 - b]
         token = [tt for (pos, tt) in json_bits if pos == b]
         if not token:
             differences.append(f"Bit {b}: No corresponding JSON bit found.")
@@ -115,6 +124,11 @@ def parse_location(loc_str):
 
     # Check variable fields
     for var_name, (high, low) in yaml_var_positions.items():
+        # Ensure the variable range fits within the expected_length
+        if high >= expected_length or low < 0:
+            differences.append(f"Variable {var_name}: location {high}-{low} is out of range for {expected_length}-bit instruction.")
+            continue
+
         json_var_fields = []
         for bb in range(low, high+1):
             token = [tt for (pos, tt) in json_bits if pos == bb]
@@ -190,7 +204,7 @@ def safe_print_instruction_details(name: str, data: dict, output_stream):
 
         if yaml_match and encoding:
             # Perform comparison
-            differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding)
+            differences = compare_yaml_json_encoding(name, yaml_match, yaml_vars, encoding)
             if differences and len(differences) > 0:
                 output_stream.write("\nEncodings do not match. Differences:\n")
                 for d in differences:
diff --git a/ext/auto-inst/test.py b/ext/auto-inst/test.py
index 6f0b7b21a..eb0c69191 100644
--- a/ext/auto-inst/test.py
+++ b/ext/auto-inst/test.py
@@ -18,7 +18,7 @@ def setup_paths(request):
 
     return json_file, repo_dir, output_file
 
-def test_run_parser_mimic_old_behavior(setup_paths):
+def test_llvm(setup_paths):
     json_file, repo_dir, output_file = setup_paths
 
     # Run the parser (similar to old behavior)

From 6a81b5dd90f6b6de33b98812a6a1ef1312d3b8ba Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Thu, 19 Dec 2024 09:16:27 +0000
Subject: [PATCH 5/7] Revert bad parsing

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .../__pycache__/parsing.cpython-310.pyc       | Bin 9422 -> 9422 bytes
 .../test.cpython-310-pytest-8.3.4.pyc         | Bin 1944 -> 1777 bytes
 ext/auto-inst/test.py                         |  16 ++++++----------
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc
index 09227159071d37a12bdaf3dbfea20c3722291076..6e1d3e4c34d9c9a2038f2ebd613ceb4a4676afeb 100644
GIT binary patch
delta 19
ZcmX@-dCrq7pO=@50SGQW-pF-C1pqwz1|t9f

delta 19
ZcmX@-dCrq7pO=@50SFFV+sJi91pqu?1_uBD

diff --git a/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc b/ext/auto-inst/__pycache__/test.cpython-310-pytest-8.3.4.pyc
index bf345f660203a51a0b63b051ab24a0a32a775131..82b12de2706eb3018c923f41d81b8a5588cb79b4 100644
GIT binary patch
delta 145
zcmbQi|B;t3pO=@50SGvsB&R#GZ{+h~W;C4~&1^b(6|)MX(&Y2ZN|U)*>=|`|V)BzS
zS@<STWLeB;Fxiw<3n-hwsw7<{?wXgJpOTrEUaXLkuaK8tqL7<dlANJeB|rHTtNdg!
ow&lu9984UHTueO7K$4M%iHlL@KL?P_!OFqJ#|ROd{D5r-0CXuKxc~qF

delta 315
zcmey!JA<DupO=@50SG=^OHRMcv60V*nbCi8G_$FchHqwWX0k$lPKrWta#3bMi9&f|
zQC?<Vy5{5w%qoo5leaP}Nh)BfN-ZkNFVdWRo!Oqz9cYmJWHS~%e1^HOEM_#G{DMUb
zXrwT!k_S#BtF&G7lJir5E+|$=$ydnBFHy)%EJ@B#NGw(;NG$@pG$}PBu`DycNUzFg
zasaEmjzUOkafw1fVsUY5u|iTQPzT6U#SmYWCMTyB1BIb_Cg-uPP-f;};$Y-r;$a4o
dj66(Sj57Z@SU4Ct*f^N@7{Ox9lP%eH0068dU<m*K

diff --git a/ext/auto-inst/test.py b/ext/auto-inst/test.py
index eb0c69191..63ef51fae 100644
--- a/ext/auto-inst/test.py
+++ b/ext/auto-inst/test.py
@@ -21,36 +21,32 @@ def setup_paths(request):
 def test_llvm(setup_paths):
     json_file, repo_dir, output_file = setup_paths
 
-    # Run the parser (similar to old behavior)
     result = run_parser(json_file, repo_dir, output_file=output_file)
 
     if result is None:
-        print("WARNING: No instructions found or an error occurred. (Mimic old script warning)")
+        print("WARNING: No instructions found or an error occurred. ")
         # You could fail here if this was previously considered a hard error
         pytest.fail("No output produced by run_parser.")
 
     # Check output file content
     if not os.path.exists(output_file):
-        print("ERROR: output.txt was not created. (Mimic old script error)")
+        print("ERROR: output.txt was not created.")
         pytest.fail("Output file was not created.")
 
     with open(output_file, 'r') as f:
         content = f.read()
 
-    # Mimic old behavior: print warnings if no instructions found
     if "Total Instructions Found: 0" in content:
-        print("WARNING: No instructions found in output.txt (Mimic old script warning)")
+        print("WARNING: No instructions found in output.txt ")
 
     # Check for encoding differences
     # In the original script, encoding mismatches were printed like:
     # "Encodings do not match. Differences:"
-    # If we find that line, we mimic the old error messages
     if "Encodings do not match. Differences:" in content:
         # Extract differences lines
         lines = content.splitlines()
         diff_lines = [line for line in lines if line.strip().startswith("-")]
-        print("ERROR: Encoding differences found! (Mimic old script error)")
-        pytest.fail("Encodings do not match as per old behavior.")
+        print("ERROR: Encoding differences found!")
+        pytest.fail("Encodings do not match.")
 
-    # If we reach here, we mimic the old success output
-    print("No warnings or errors detected. Test passes but mimics old success behavior.")
+    print("No warnings or errors detected.")

From a4b5c3982697656226115c21823066b9929e5e67 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Thu, 19 Dec 2024 09:22:35 +0000
Subject: [PATCH 6/7] Allow only one value

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .../__pycache__/parsing.cpython-310.pyc       | Bin 9422 -> 9483 bytes
 ext/auto-inst/parsing.py                      |  12 ++++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc
index 6e1d3e4c34d9c9a2038f2ebd613ceb4a4676afeb..e9334dd9950c43dcaadf78534695e91af9c81202 100644
GIT binary patch
delta 819
zcmX9+-Afcv6rVFQJ5IB^GZu=gZn&#!utgc9VM=E9fgdQ64?dI(>V7!2I@_IbwqTcn
z70Lo3Zbk&5z6ACnGSOpyLG<z1ya)BrOVnFn=ZrJl-}#-dd+s^+ej0l^uG{grCgAtz
z`(p7#@4mjKJ&;kAaLwRyMi5+~D%4>cc7&RcgR0;OthFj^i#gbVTVf4rqAOLQ4v1nM
z3gV1_3^Qs{6@SvR;uPz_ow9NuIW7w#2v!+Ixn#M4WUaX9X(-u?rsE!EfxKiDmjbC|
zR|3hKFY#Ym#)x5hOjJRKMo=ZyPegkGJbyBF0nQAek5L<kaEU5GATBH%>QVAsMc5#6
z&3_kf1L)_U_2Wt(S<ds{`k-=|=m<Y&n93ERWBjEt3^D%AC@MKJ4EWa@Peznk;^V&8
z{1#465?2T!1k(g(5rPJm_wvlivvx9_BFOQ{mTnl}PRkt3@vklY@S1Ch8m#$`6L$a@
z|I>Q0;|E4MO$f5w1S)9QtPU{{ABz9v2;1<d+dcsN<wmj_HvPWjO$qMuXB|(VmnT!x
zP~gj{w34T4W&S+16%89sknZuJbh7zAQ92;IL9j_s;dZ)xgpMA_j?G-=kTvXV4O!L*
zj@)+a6|-QK@@$>LqV}p=UUfrD00q`S33dK4J&WySI=evQqn&fG%inZ<86HIEPmv;C
zAY_4UlTZ!|RFDVqJS%$43u*qI<+5Fhkk}Rqw$9&or6xuJ$8rpHJsgTAYM_U2K|Wx6
zl&G5K^1QWTnpl9|E{d>R6KkbiUJ*<YRCqemlL_xLoI`kT)F%sP6;6cevpw!+CX9kQ
Pp~kglbx4hAARqh(Nuj?k

delta 735
zcmYk4&rcIk5XU>a+of#V-Nsm3N>N%%Vij!!4Hy)w2Z_{Q2_{AnnzoiwE#2nr7L2wr
zQkA6QX*BWT4^AXR63E`Yc_ni2vKLJ_c`(MmK;LXHZt_0wJM-qv&U^D-PVQV*b-!P6
z@o#spP#PTGR-Y-;A~$JF@nIa~Xt+MQ?!g+|6`If#OlUw0IE5C>2~)15aE+ySR8p|r
zMy*mdZLwT6lcL>K(F<9_WVSS4F3sCwMPI_WH=#&8ScL#Q8p7{d1h;&O)$sZOs#e=~
z22PCeWkxl1aF$YoE#&hZ+e<4KI9`yrX#Md=08XP%>ywAca|%=1m^@DMJl3?VoF;h*
zziDZZ@Ias>=g2W)wE{=o@=ekcmKEH9<CCOif}3!a!xpLjE>>JDGNZ_P$$yNH!**x@
zMsO=M19wmjkHQ<A2sdHb`VxKspySENIrxEkqz_iDSCP*Ezwk<Q0MKI5X%VWpbEpl2
zc)R}w6mX+ICg&;Z0`B&I@UD^!6HI&@iw2iSGQxEN66*LV)_cS$Q8IL9vKo1v6D)I;
z)a>s2hF;Cil`BQIg1=)u@D#h_w|L39_+b$6d3*-eaWDS$$taKiDbV)Uv=i8Sn&eKh
zI@h+8W2HrAI<ok%Y_fHV5ZOCEu@!R5tZ39L=6`LYHrdibu3Sw6e@6Sbse%U-Py_{3
z!2>Etf7zzXg+qy8$ocbr)<X&35;BB3&L&a`r(MqRowm^Nm~%zWMNlbh6Sor?ZPt_V
N_?4h%+~ZSt@_+sYt@Ho@

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index bd5fac67c..a29e76cce 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -75,8 +75,16 @@ def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_
         return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected {expected_length}. Cannot compare properly."]
 
     def parse_location(loc_str):
-        high, low = loc_str.split('-')
-        return int(high), int(low)
+        # Ensure loc_str is a string
+        loc_str = str(loc_str).strip()
+        if '-' in loc_str:
+            high, low = loc_str.split('-')
+            return int(high), int(low)
+        else:
+            # If no dash, treat it as a single bit field
+            val = int(loc_str)
+            return val, val
+
 
     yaml_var_positions = {}
     for var in yaml_vars:

From 66ef4872e713d18e728b3e9e019f47e90700ebf7 Mon Sep 17 00:00:00 2001
From: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
Date: Thu, 19 Dec 2024 17:32:17 +0000
Subject: [PATCH 7/7] Use AsmString instead of name

Signed-off-by: Afonso Oliveira <Afonso.Oliveira@synopsys.com>
---
 .../__pycache__/parsing.cpython-310.pyc       | Bin 9483 -> 9704 bytes
 ext/auto-inst/parsing.py                      |  46 ++++++++++++------
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/ext/auto-inst/__pycache__/parsing.cpython-310.pyc b/ext/auto-inst/__pycache__/parsing.cpython-310.pyc
index e9334dd9950c43dcaadf78534695e91af9c81202..75bd77c41bc885711b1bbf6a2aef74f4c400a15d 100644
GIT binary patch
delta 832
zcmZuv&ubGw6rP#gt=nl5L{e;}%A{$b0rA(tLj@r!9;%I)6s6D-rrAxpw!3R)HcDYb
z4n;h891mWU<dl<#(!arfBH&37PyP$_&7>qE4(zw@y?yWP_q{hCHhw+H#n~)Ha1LL3
zt)06kId!Gyfcp?0=>epuj|QlZ2dIlu+$RI{2<@)!kS^(3iIrN3eN1G&O%o@zJnWHU
zYm#rLwk))1YWHUb$g>tv*GZie9T0IdB?MtqLv8BWHB@r=dyEiea2^M}$YY#^JZbC)
zQHuqUOhl)V1aSm5dnMzj?(u}Py(89$!`EEsEZ*Wek(FgwOHAYqKkWGOhG|;$la7ca
zOZNR~;{*SQ1x?nBJCS#tRpQ8JF(ioI+d%r`I#XO0ExB$i&Ck%OM?Cb|8W1b3vn^=)
zH%arYDdLcAmp7kI3a7-^yr7Ys7jD#^RhW^}h@%8F1X1r_&COWAG(i8Pmod&oO{oDL
z77L?f%xW@2E!8lZlK7u;Eu69*s1z+&y|6UEVo;@YLCkCCwuY((TZ6tSBBS9BKxsx4
z;5C|^{6)Chcg!kAl*_OV9ARCf>>ihXT?h2Egua1em_tb2TbjSN0Fg}gqD3*mdCLaO
zI4*H9IkPRHPQ}ts3qLsBW}6r*H7tC`3+mnSE*`2M%Om$A6qm60qP`Ui?od0H_^f^x
zSFgSRHS!Wfa@fTbQ$leLy5_KbCcdg$E0?pTHZK+-M|@ISE7durP2B}DPF9}J7Tx=9
HhC;<3kxt@C

delta 584
zcmY*V&ubGw6yBNL4V$&ui^k%Q!n8>Yfl??G4?PH?2T`hM&m~NEC&_5C8{bYgZQ0O^
z;6d;*#Df=;V^2luAK=~pV8Me2#j95@^-U~-58jXOd*8hGn4gc{JPeXb#kbI2e-FF2
zwmt^>`f(ZIJ@j-RLvGEixiz6Vosoz{bWSIBPIDW1Iyc=5_5#`eafo`QiIY3`tv!F@
z=59_h3pR3>TEyD5Ue+Uf*J?ROpL~XPS1B<Vs$-@S){$|<I2&*k_GH{;FT^2}F?*gS
zaXaFQvrYm?qzc+GABc4pCUGhw0Y)6jPDg-<)nY964T*vbDT%sm24dLfp`n`_O*WRQ
z$C~ZtI^($YvVud?$vDVll(LYgLVvTb3JCB&pn~CX%Ymu^%NPoGG#m;j{E!c&;(d7}
zS|wODo$|kXp=NNkpNz%di`%0ZuT8@QMB$lLJXUEzG99ec)^4KMbC(FEl$1!YaObyI
z_J~L5xkF)k_GIOUO}5T%ukH{s)vwmR5?}Av_Q;9;Tq`!;;eZx_nMozs!31Lrg~Oqn
zLAEHxfmCo}6gGU&U+N9=K>w=WAg8+0SgtH4T!Cxm>3hA|cy?)ON~4EU{kHM6T=njF
JOFnVV{{T2GrN00G

diff --git a/ext/auto-inst/parsing.py b/ext/auto-inst/parsing.py
index a29e76cce..b272e3580 100644
--- a/ext/auto-inst/parsing.py
+++ b/ext/auto-inst/parsing.py
@@ -244,23 +244,37 @@ def get_repo_instructions(repo_directory):
 
 def find_json_key(instr_name, json_data):
     """
-    Attempt to find a matching key in json_data for instr_name, considering different
-    naming conventions: replacing '.' with '_', and trying various case transformations.
+    Find a matching instruction in json_data by comparing against AsmString values.
+    Returns the matching key if found, None otherwise.
+    
+    Args:
+        instr_name (str): The instruction name from YAML
+        json_data (dict): The JSON data containing instruction information
+        
+    Returns:
+        str or None: The matching key from json_data if found, None otherwise
     """
-    lower_name = instr_name.lower()
-    lower_name_underscore = lower_name.replace('.', '_')
-    variants = {
-        lower_name,
-        lower_name_underscore,
-        instr_name.upper(),
-        instr_name.replace('.', '_').upper(),
-        instr_name.capitalize(),
-        instr_name.replace('.', '_').capitalize()
-    }
-
-    for v in variants:
-        if v in json_data:
-            return v
+    # First, normalize the instruction name for comparison
+    instr_name = instr_name.lower().strip()
+    
+    # Search through all entries in json_data
+    for key, value in json_data.items():
+        if not isinstance(value, dict):
+            continue
+            
+        # Get the AsmString value and normalize it
+        asm_string = safe_get(value, 'AsmString', '').lower().strip()
+        if not asm_string:
+            continue
+            
+        # Extract the base instruction name from AsmString
+        # AsmString might be in format like "add $rd, $rs1, $rs2" 
+        # We want just "add"
+        base_asm_name = asm_string.split()[0]
+        
+        if base_asm_name == instr_name:
+            return key
+            
     return None
 
 def run_parser(json_file, repo_directory, output_file="output.txt"):