-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
LLVM verification #356
base: main
Are you sure you want to change the base?
LLVM verification #356
Changes from all commits
d9b50b2
1a14ba8
afc753e
29ec737
6a81b5d
a4b5c39
66ef487
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,355 @@ | ||
import os | ||
import json | ||
import re | ||
import sys | ||
from collections import defaultdict | ||
import yaml | ||
|
||
REPO_INSTRUCTIONS = {} | ||
REPO_DIRECTORY = None | ||
|
||
def safe_get(data, key, default=""): | ||
"""Safely get a value from a dictionary, return default if not found or error.""" | ||
try: | ||
if isinstance(data, dict): | ||
return data.get(key, default) | ||
return default | ||
except: | ||
return default | ||
|
||
def load_yaml_encoding(instr_name): | ||
""" | ||
Given an instruction name (from JSON), find the corresponding YAML file and load its encoding data. | ||
We'll try to match the instr_name to a YAML file by using REPO_INSTRUCTIONS and transformations. | ||
""" | ||
candidates = set() | ||
lower_name = instr_name.lower() | ||
candidates.add(lower_name) | ||
candidates.add(lower_name.replace('_', '.')) | ||
|
||
yaml_file_path = None | ||
yaml_category = None | ||
for cand in candidates: | ||
if cand in REPO_INSTRUCTIONS: | ||
yaml_category = REPO_INSTRUCTIONS[cand] | ||
yaml_file_path = os.path.join(REPO_DIRECTORY, yaml_category, cand + ".yaml") | ||
if os.path.isfile(yaml_file_path): | ||
break | ||
else: | ||
yaml_file_path = None | ||
|
||
if not yaml_file_path or not os.path.isfile(yaml_file_path): | ||
# YAML not found | ||
return None, None | ||
|
||
# Load the YAML file | ||
with open(yaml_file_path, 'r') as yf: | ||
ydata = yaml.safe_load(yf) | ||
|
||
encoding = safe_get(ydata, 'encoding', {}) | ||
yaml_match = safe_get(encoding, 'match', None) | ||
yaml_vars = safe_get(encoding, 'variables', []) | ||
|
||
return yaml_match, yaml_vars | ||
|
||
def compare_yaml_json_encoding(instr_name, yaml_match, yaml_vars, json_encoding_str): | ||
""" | ||
Compare the YAML encoding (match + vars) with the JSON encoding (binary format). | ||
If the JSON has a variable like vm[?], it should be treated as just vm. | ||
|
||
If instr_name starts with 'C_', then treat the instruction as 16 bits long. | ||
Otherwise, treat it as 32 bits long. | ||
|
||
Return a list of differences. | ||
""" | ||
if not yaml_match: | ||
return ["No YAML match field available for comparison."] | ||
if not json_encoding_str: | ||
return ["No JSON encoding available for comparison."] | ||
|
||
# Determine expected length based on whether it's a compressed instruction (C_) | ||
expected_length = 16 if instr_name.startswith('C_') else 32 | ||
|
||
yaml_pattern_str = yaml_match.replace('-', '.') | ||
if len(yaml_pattern_str) != expected_length: | ||
return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected {expected_length}. Cannot compare properly."] | ||
|
||
def parse_location(loc_str): | ||
# Ensure loc_str is a string | ||
loc_str = str(loc_str).strip() | ||
if '-' in loc_str: | ||
high, low = loc_str.split('-') | ||
return int(high), int(low) | ||
else: | ||
# If no dash, treat it as a single bit field | ||
val = int(loc_str) | ||
return val, val | ||
|
||
|
||
yaml_var_positions = {} | ||
for var in yaml_vars: | ||
high, low = parse_location(var["location"]) | ||
yaml_var_positions[var["name"]] = (high, low) | ||
|
||
# Tokenize the JSON encoding string. We assume it should match the expected_length in bits. | ||
tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\]|\[\?\])?)', json_encoding_str) | ||
json_bits = [] | ||
bit_index = expected_length - 1 | ||
for t in tokens: | ||
json_bits.append((bit_index, t)) | ||
bit_index -= 1 | ||
|
||
if bit_index != -1: | ||
return [f"JSON encoding does not appear to be {expected_length} bits. Ends at bit {bit_index+1}."] | ||
|
||
# Normalize JSON bits (handle vm[?] etc.) | ||
normalized_json_bits = [] | ||
for pos, tt in json_bits: | ||
if re.match(r'vm\[[^\]]*\]', tt): | ||
tt = 'vm' | ||
normalized_json_bits.append((pos, tt)) | ||
json_bits = normalized_json_bits | ||
|
||
differences = [] | ||
|
||
# Check fixed bits | ||
for b in range(expected_length): | ||
yaml_bit = yaml_pattern_str[expected_length - 1 - b] | ||
token = [tt for (pos, tt) in json_bits if pos == b] | ||
if not token: | ||
differences.append(f"Bit {b}: No corresponding JSON bit found.") | ||
continue | ||
json_bit_str = token[0] | ||
|
||
if yaml_bit in ['0', '1']: | ||
if json_bit_str not in ['0', '1']: | ||
differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'") | ||
elif json_bit_str != yaml_bit: | ||
differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'") | ||
else: | ||
if json_bit_str in ['0', '1']: | ||
differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'") | ||
|
||
# Check variable fields | ||
for var_name, (high, low) in yaml_var_positions.items(): | ||
# Ensure the variable range fits within the expected_length | ||
if high >= expected_length or low < 0: | ||
differences.append(f"Variable {var_name}: location {high}-{low} is out of range for {expected_length}-bit instruction.") | ||
continue | ||
|
||
json_var_fields = [] | ||
for bb in range(low, high+1): | ||
token = [tt for (pos, tt) in json_bits if pos == bb] | ||
if token: | ||
json_var_fields.append(token[0]) | ||
else: | ||
json_var_fields.append('?') | ||
|
||
# Extract field names | ||
field_names = set(re.findall(r'([A-Za-z0-9]+)(?:\[\d+\]|\[\?\])?', ' '.join(json_var_fields))) | ||
if len(field_names) == 0: | ||
differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}") | ||
elif len(field_names) > 1: | ||
differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}") | ||
|
||
return differences | ||
|
||
def safe_print_instruction_details(name: str, data: dict, output_stream): | ||
"""Print formatted instruction details and compare YAML/JSON encodings.""" | ||
try: | ||
output_stream.write(f"\n{name} Instruction Details\n") | ||
output_stream.write("=" * 50 + "\n") | ||
|
||
output_stream.write("\nBasic Information:\n") | ||
output_stream.write("-" * 20 + "\n") | ||
output_stream.write(f"Name: {name}\n") | ||
output_stream.write(f"Assembly Format: {safe_get(data, 'AsmString', 'N/A')}\n") | ||
output_stream.write(f"Size: {safe_get(data, 'Size', 'N/A')} bytes\n") | ||
|
||
locs = safe_get(data, '!locs', []) | ||
loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A" | ||
output_stream.write(f"Location: {loc}\n") | ||
|
||
output_stream.write("\nOperands:\n") | ||
output_stream.write("-" * 20 + "\n") | ||
try: | ||
in_ops = safe_get(data, 'InOperandList', {}).get('printable', 'N/A') | ||
output_stream.write(f"Inputs: {in_ops}\n") | ||
except: | ||
output_stream.write("Inputs: N/A\n") | ||
|
||
try: | ||
out_ops = safe_get(data, 'OutOperandList', {}).get('printable', 'N/A') | ||
output_stream.write(f"Outputs: {out_ops}\n") | ||
except: | ||
output_stream.write("Outputs: N/A\n") | ||
|
||
# Encoding | ||
output_stream.write("\nEncoding Pattern:\n") | ||
output_stream.write("-" * 20 + "\n") | ||
encoding_bits = [] | ||
try: | ||
inst = safe_get(data, 'Inst', []) | ||
for bit in inst: | ||
if isinstance(bit, dict): | ||
encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]") | ||
else: | ||
encoding_bits.append(str(bit)) | ||
# Reverse the bit order before joining | ||
encoding_bits.reverse() | ||
lenary marked this conversation as resolved.
Show resolved
Hide resolved
|
||
encoding = "".join(encoding_bits) | ||
output_stream.write(f"JSON Encoding: {encoding}\n") | ||
except: | ||
output_stream.write("JSON Encoding: Unable to parse encoding\n") | ||
encoding = "" | ||
|
||
# compare YAML vs JSON encodings | ||
yaml_match, yaml_vars = load_yaml_encoding(name) | ||
if yaml_match is not None: | ||
output_stream.write(f"YAML Encoding: {yaml_match}\n") | ||
else: | ||
output_stream.write("YAML Encoding: Not found\n") | ||
|
||
if yaml_match and encoding: | ||
# Perform comparison | ||
differences = compare_yaml_json_encoding(name, yaml_match, yaml_vars, encoding) | ||
if differences and len(differences) > 0: | ||
output_stream.write("\nEncodings do not match. Differences:\n") | ||
for d in differences: | ||
output_stream.write(f" - {d}\n") | ||
print(f"Difference in {name}: {d}", file=sys.stdout) # Print to console | ||
else: | ||
output_stream.write("\nEncodings Match: No differences found.\n") | ||
else: | ||
# If we have no YAML match or no JSON encoding, we note that we can't compare | ||
output_stream.write("\nComparison: Cannot compare encodings (missing YAML or JSON encoding).\n") | ||
|
||
output_stream.write("\n") | ||
except Exception as e: | ||
output_stream.write(f"Error processing instruction {name}: {str(e)}\n") | ||
output_stream.write("Continuing with next instruction...\n\n") | ||
|
||
def get_repo_instructions(repo_directory): | ||
""" | ||
Recursively find all YAML files in the repository and extract instruction names along with their category. | ||
""" | ||
repo_instructions = {} | ||
for root, _, files in os.walk(repo_directory): | ||
for file in files: | ||
if file.endswith(".yaml"): | ||
instr_name = os.path.splitext(file)[0] | ||
relative_path = os.path.relpath(root, repo_directory) | ||
repo_instructions[instr_name.lower()] = relative_path | ||
return repo_instructions | ||
|
||
def find_json_key(instr_name, json_data): | ||
""" | ||
Find a matching instruction in json_data by comparing against AsmString values. | ||
Returns the matching key if found, None otherwise. | ||
|
||
Args: | ||
instr_name (str): The instruction name from YAML | ||
json_data (dict): The JSON data containing instruction information | ||
|
||
Returns: | ||
str or None: The matching key from json_data if found, None otherwise | ||
""" | ||
# First, normalize the instruction name for comparison | ||
instr_name = instr_name.lower().strip() | ||
|
||
# Search through all entries in json_data | ||
for key, value in json_data.items(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From what I can see
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, let's assume you parsed the whole json object, into a python variable called
You would use code like the following: for def_name in json["!instanceof"]["RVInstCommon"]:
def_data = json[def_name]
... This saves you having to look at all the tablegen data that is not an instruction (so an alias or a pattern or CSR or something). Note you'll still have to look at |
||
if not isinstance(value, dict): | ||
continue | ||
|
||
# Get the AsmString value and normalize it | ||
asm_string = safe_get(value, 'AsmString', '').lower().strip() | ||
if not asm_string: | ||
continue | ||
|
||
# Extract the base instruction name from AsmString | ||
# AsmString might be in format like "add $rd, $rs1, $rs2" | ||
# We want just "add" | ||
base_asm_name = asm_string.split()[0] | ||
|
||
if base_asm_name == instr_name: | ||
return key | ||
|
||
return None | ||
|
||
def run_parser(json_file, repo_directory, output_file="output.txt"): | ||
""" | ||
Run the parser logic: | ||
1. Get instructions from the repo directory. | ||
2. Parse the JSON file and match instructions. | ||
3. Generate output.txt with instruction details. | ||
""" | ||
global REPO_INSTRUCTIONS, REPO_DIRECTORY | ||
REPO_DIRECTORY = repo_directory | ||
|
||
# Get instructions and categories from the repository structure | ||
REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY) | ||
if not REPO_INSTRUCTIONS: | ||
print("No instructions found in the provided repository directory.") | ||
return None | ||
|
||
try: | ||
# Read and parse JSON | ||
with open(json_file, 'r') as f: | ||
data = json.loads(f.read()) | ||
except Exception as e: | ||
print(f"Error reading file: {str(e)}") | ||
return None | ||
|
||
all_instructions = [] | ||
|
||
# For each YAML instruction, try to find it in the JSON data | ||
for yaml_instr_name, category in REPO_INSTRUCTIONS.items(): | ||
json_key = find_json_key(yaml_instr_name, data) | ||
if json_key is None: | ||
print(f"DEBUG: Instruction '{yaml_instr_name}' (from YAML) not found in JSON, skipping...", file=sys.stderr) | ||
continue | ||
|
||
instr_data = data.get(json_key) | ||
if not isinstance(instr_data, dict): | ||
print(f"DEBUG: Instruction '{yaml_instr_name}' is in JSON but not a valid dict, skipping...", file=sys.stderr) | ||
continue | ||
|
||
# Add this instruction to our list | ||
all_instructions.append((json_key, instr_data)) | ||
|
||
# Sort all instructions by name | ||
all_instructions.sort(key=lambda x: x[0].lower()) | ||
|
||
with open(output_file, "w") as outfile: | ||
outfile.write("RISC-V Instruction Summary\n") | ||
outfile.write("=" * 50 + "\n") | ||
total = len(all_instructions) | ||
outfile.write(f"\nTotal Instructions Found: {total}\n") | ||
for name, _ in all_instructions: | ||
outfile.write(f" - {name}\n") | ||
|
||
outfile.write("\nDETAILED INSTRUCTION INFORMATION\n") | ||
outfile.write("=" * 80 + "\n") | ||
|
||
# Print details for each instruction directly | ||
for name, instr_data in all_instructions: | ||
safe_print_instruction_details(name, instr_data, outfile) | ||
|
||
print(f"Output has been written to {output_file}") | ||
return output_file | ||
|
||
def main(): | ||
if len(sys.argv) != 3: | ||
print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>") | ||
sys.exit(1) | ||
|
||
json_file = sys.argv[1] | ||
repo_directory = sys.argv[2] | ||
|
||
result = run_parser(json_file, repo_directory, output_file="output.txt") | ||
if result is None: | ||
sys.exit(1) | ||
|
||
if __name__ == '__main__': | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If an instruction doesn't have a Size, it cannot be encoded, so it's likely not of interest to checking the encoding.