-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add simple Docker environment variable
Signed-off-by: Afonso Oliveira <[email protected]>
- Loading branch information
1 parent
db49639
commit d9b50b2
Showing
1 changed file
with
328 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,328 @@ | ||
import os | ||
import json | ||
import re | ||
import sys | ||
from collections import defaultdict | ||
import yaml | ||
|
||
REPO_INSTRUCTIONS = {} | ||
REPO_DIRECTORY = None | ||
|
||
def safe_get(data, key, default=""): | ||
"""Safely get a value from a dictionary, return default if not found or error.""" | ||
try: | ||
if isinstance(data, dict): | ||
return data.get(key, default) | ||
return default | ||
except: | ||
return default | ||
|
||
def load_yaml_encoding(instr_name): | ||
""" | ||
Given an instruction name (from JSON), find the corresponding YAML file and load its encoding data. | ||
We'll try to match the instr_name to a YAML file by using REPO_INSTRUCTIONS and transformations. | ||
""" | ||
candidates = set() | ||
lower_name = instr_name.lower() | ||
candidates.add(lower_name) | ||
candidates.add(lower_name.replace('_', '.')) | ||
|
||
yaml_file_path = None | ||
yaml_category = None | ||
for cand in candidates: | ||
if cand in REPO_INSTRUCTIONS: | ||
yaml_category = REPO_INSTRUCTIONS[cand] | ||
yaml_file_path = os.path.join(REPO_DIRECTORY, yaml_category, cand + ".yaml") | ||
if os.path.isfile(yaml_file_path): | ||
break | ||
else: | ||
yaml_file_path = None | ||
|
||
if not yaml_file_path or not os.path.isfile(yaml_file_path): | ||
# YAML not found | ||
return None, None | ||
|
||
# Load the YAML file | ||
with open(yaml_file_path, 'r') as yf: | ||
ydata = yaml.safe_load(yf) | ||
|
||
encoding = safe_get(ydata, 'encoding', {}) | ||
yaml_match = safe_get(encoding, 'match', None) | ||
yaml_vars = safe_get(encoding, 'variables', []) | ||
|
||
return yaml_match, yaml_vars | ||
|
||
def compare_yaml_json_encoding(yaml_match, yaml_vars, json_encoding_str): | ||
""" | ||
Compare the YAML encoding (match + vars) with the JSON encoding (binary format). | ||
Return a list of differences. | ||
""" | ||
if not yaml_match: | ||
return ["No YAML match field available for comparison."] | ||
if not json_encoding_str: | ||
return ["No JSON encoding available for comparison."] | ||
|
||
yaml_pattern_str = yaml_match.replace('-', '.') | ||
if len(yaml_pattern_str) != 32: | ||
return [f"YAML match pattern length is {len(yaml_pattern_str)}, expected 32. Cannot compare properly."] | ||
|
||
def parse_location(loc_str): | ||
high, low = loc_str.split('-') | ||
return int(high), int(low) | ||
|
||
yaml_var_positions = {} | ||
for var in yaml_vars: | ||
high, low = parse_location(var["location"]) | ||
yaml_var_positions[var["name"]] = (high, low) | ||
|
||
# Tokenize JSON encoding | ||
tokens = re.findall(r'(?:[01]|[A-Za-z0-9]+(?:\[\d+\])?)', json_encoding_str) | ||
json_bits = [] | ||
bit_index = 31 | ||
for t in tokens: | ||
json_bits.append((bit_index, t)) | ||
bit_index -= 1 | ||
|
||
if bit_index != -1: | ||
return [f"JSON encoding does not appear to be 32 bits. Ends at bit {bit_index+1}."] | ||
|
||
differences = [] | ||
|
||
# Check fixed bits | ||
for b in range(32): | ||
yaml_bit = yaml_pattern_str[31 - b] | ||
token = [tt for (pos, tt) in json_bits if pos == b] | ||
if not token: | ||
differences.append(f"Bit {b}: No corresponding JSON bit found.") | ||
continue | ||
json_bit_str = token[0] | ||
|
||
if yaml_bit in ['0', '1']: | ||
if json_bit_str not in ['0', '1']: | ||
differences.append(f"Bit {b}: YAML expects fixed bit '{yaml_bit}' but JSON has '{json_bit_str}'") | ||
elif json_bit_str != yaml_bit: | ||
differences.append(f"Bit {b}: YAML expects '{yaml_bit}' but JSON has '{json_bit_str}'") | ||
else: | ||
# Variable bit in YAML | ||
if json_bit_str in ['0', '1']: | ||
differences.append(f"Bit {b}: YAML variable bit but JSON is fixed '{json_bit_str}'") | ||
|
||
# Check variable fields | ||
for var_name, (high, low) in yaml_var_positions.items(): | ||
json_var_fields = [] | ||
for bb in range(low, high+1): | ||
token = [tt for (pos, tt) in json_bits if pos == bb] | ||
if token: | ||
json_var_fields.append(token[0]) | ||
else: | ||
json_var_fields.append('?') | ||
|
||
field_names = set(re.findall(r'([A-Za-z0-9]+)\[\d+\]', ' '.join(json_var_fields))) | ||
if len(field_names) == 0: | ||
differences.append(f"Variable {var_name}: No corresponding field found in JSON bits {high}-{low}") | ||
elif len(field_names) > 1: | ||
differences.append(f"Variable {var_name}: Multiple fields {field_names} found in JSON for bits {high}-{low}") | ||
|
||
return differences | ||
|
||
def safe_print_instruction_details(name: str, data: dict, output_stream): | ||
"""Print formatted instruction details and compare YAML/JSON encodings.""" | ||
try: | ||
# Print the instruction details without separating by category | ||
output_stream.write(f"\n{name} Instruction Details\n") | ||
output_stream.write("=" * 50 + "\n") | ||
|
||
# Basic Information | ||
output_stream.write("\nBasic Information:\n") | ||
output_stream.write("-" * 20 + "\n") | ||
output_stream.write(f"Name: {name}\n") | ||
output_stream.write(f"Assembly Format: {safe_get(data, 'AsmString', 'N/A')}\n") | ||
output_stream.write(f"Size: {safe_get(data, 'Size', 'N/A')} bytes\n") | ||
|
||
# Location | ||
locs = safe_get(data, '!locs', []) | ||
loc = locs[0] if isinstance(locs, list) and len(locs) > 0 else "N/A" | ||
output_stream.write(f"Location: {loc}\n") | ||
|
||
# Operands | ||
output_stream.write("\nOperands:\n") | ||
output_stream.write("-" * 20 + "\n") | ||
try: | ||
in_ops = safe_get(data, 'InOperandList', {}).get('printable', 'N/A') | ||
output_stream.write(f"Inputs: {in_ops}\n") | ||
except: | ||
output_stream.write("Inputs: N/A\n") | ||
|
||
try: | ||
out_ops = safe_get(data, 'OutOperandList', {}).get('printable', 'N/A') | ||
output_stream.write(f"Outputs: {out_ops}\n") | ||
except: | ||
output_stream.write("Outputs: N/A\n") | ||
|
||
# Instruction Properties | ||
output_stream.write("\nInstruction Properties:\n") | ||
output_stream.write("-" * 20 + "\n") | ||
output_stream.write(f"Commutable: {'Yes' if safe_get(data, 'isCommutable', 0) else 'No'}\n") | ||
output_stream.write(f"Memory Load: {'Yes' if safe_get(data, 'mayLoad', 0) else 'No'}\n") | ||
output_stream.write(f"Memory Store: {'Yes' if safe_get(data, 'mayStore', 0) else 'No'}\n") | ||
output_stream.write(f"Side Effects: {'Yes' if safe_get(data, 'hasSideEffects', 0) else 'No'}\n") | ||
|
||
# Scheduling Info | ||
sched = safe_get(data, 'SchedRW', []) | ||
if sched: | ||
output_stream.write("\nScheduling Information:\n") | ||
output_stream.write("-" * 20 + "\n") | ||
output_stream.write("Operations:\n") | ||
try: | ||
for op in sched: | ||
if isinstance(op, dict): | ||
output_stream.write(f" - {op.get('printable', 'N/A')}\n") | ||
except: | ||
output_stream.write(" - Unable to parse scheduling information\n") | ||
|
||
# Encoding | ||
output_stream.write("\nEncoding Pattern:\n") | ||
output_stream.write("-" * 20 + "\n") | ||
encoding_bits = [] | ||
try: | ||
inst = safe_get(data, 'Inst', []) | ||
for bit in inst: | ||
if isinstance(bit, dict): | ||
encoding_bits.append(f"{bit.get('var', '?')}[{bit.get('index', '?')}]") | ||
else: | ||
encoding_bits.append(str(bit)) | ||
# Reverse the bit order before joining | ||
encoding_bits.reverse() | ||
encoding = "".join(encoding_bits) | ||
output_stream.write(f"Binary Format: {encoding}\n") | ||
except: | ||
output_stream.write("Binary Format: Unable to parse encoding\n") | ||
encoding = "" | ||
|
||
# Now compare YAML vs JSON encodings | ||
yaml_match, yaml_vars = load_yaml_encoding(name) | ||
if yaml_match is not None and encoding: | ||
differences = compare_yaml_json_encoding(yaml_match, yaml_vars, encoding) | ||
if differences: | ||
output_stream.write("\nDifferences in encoding:\n") | ||
for d in differences: | ||
output_stream.write(f" - {d}\n") | ||
print(f"Difference in {name}: {d}", file=sys.stdout) # Print to console | ||
else: | ||
output_stream.write("\nNo encoding differences found.\n") | ||
else: | ||
# If we have no YAML match or no encoding, we note that we can't compare | ||
if yaml_match is None: | ||
output_stream.write("\nNo YAML encoding match found for comparison.\n") | ||
if not encoding: | ||
output_stream.write("\nNo JSON encoding found for comparison.\n") | ||
|
||
output_stream.write("\n") | ||
except Exception as e: | ||
output_stream.write(f"Error processing instruction {name}: {str(e)}\n") | ||
output_stream.write("Continuing with next instruction...\n\n") | ||
|
||
def get_repo_instructions(repo_directory): | ||
""" | ||
Recursively find all YAML files in the repository and extract instruction names along with their category. | ||
""" | ||
repo_instructions = {} | ||
for root, _, files in os.walk(repo_directory): | ||
rel_path = os.path.relpath(root, repo_directory) | ||
if rel_path == '.': | ||
category = "Other" | ||
else: | ||
parts = rel_path.split(os.sep) | ||
category = parts[0] if parts else "Other" | ||
|
||
for file in files: | ||
if file.endswith(".yaml"): | ||
instr_name = os.path.splitext(file)[0] | ||
# Store lowercase key for easy lookup | ||
repo_instructions[instr_name.lower()] = category | ||
return repo_instructions | ||
|
||
def find_json_key(instr_name, json_data): | ||
""" | ||
Attempt to find a matching key in json_data for instr_name, considering different | ||
naming conventions: replacing '.' with '_', and trying various case transformations. | ||
""" | ||
lower_name = instr_name.lower() | ||
lower_name_underscore = lower_name.replace('.', '_') | ||
variants = { | ||
lower_name, | ||
lower_name_underscore, | ||
instr_name.upper(), | ||
instr_name.replace('.', '_').upper(), | ||
instr_name.capitalize(), | ||
instr_name.replace('.', '_').capitalize() | ||
} | ||
|
||
for v in variants: | ||
if v in json_data: | ||
return v | ||
return None | ||
|
||
def main(): | ||
global REPO_INSTRUCTIONS, REPO_DIRECTORY | ||
|
||
if len(sys.argv) != 3: | ||
print("Usage: python riscv_parser.py <tablegen_json_file> <arch_inst_directory>") | ||
sys.exit(1) | ||
|
||
json_file = sys.argv[1] | ||
REPO_DIRECTORY = sys.argv[2] | ||
|
||
# Get instructions and categories from the repository structure | ||
REPO_INSTRUCTIONS = get_repo_instructions(REPO_DIRECTORY) | ||
if not REPO_INSTRUCTIONS: | ||
print("No instructions found in the provided repository directory.") | ||
sys.exit(1) | ||
|
||
try: | ||
# Read and parse JSON | ||
with open(json_file, 'r') as f: | ||
data = json.loads(f.read()) | ||
except Exception as e: | ||
print(f"Error reading file: {str(e)}") | ||
sys.exit(1) | ||
|
||
all_instructions = [] | ||
|
||
# For each YAML instruction, try to find it in the JSON data | ||
for yaml_instr_name, category in REPO_INSTRUCTIONS.items(): | ||
json_key = find_json_key(yaml_instr_name, data) | ||
if json_key is None: | ||
print(f"DEBUG: Instruction '{yaml_instr_name}' (from YAML) not found in JSON, skipping...", file=sys.stderr) | ||
continue | ||
|
||
instr_data = data.get(json_key) | ||
if not isinstance(instr_data, dict): | ||
print(f"DEBUG: Instruction '{yaml_instr_name}' is in JSON but not a valid dict, skipping...", file=sys.stderr) | ||
continue | ||
|
||
# Add this instruction to our list | ||
all_instructions.append((json_key, instr_data)) | ||
|
||
# Sort all instructions by name | ||
all_instructions.sort(key=lambda x: x[0].lower()) | ||
|
||
with open("output.txt", "w") as outfile: | ||
outfile.write("RISC-V Instruction Summary\n") | ||
outfile.write("=" * 50 + "\n") | ||
total = len(all_instructions) | ||
outfile.write(f"\nTotal Instructions Found: {total}\n") | ||
for name, _ in all_instructions: | ||
outfile.write(f" - {name}\n") | ||
|
||
outfile.write("\nDETAILED INSTRUCTION INFORMATION\n") | ||
outfile.write("=" * 80 + "\n") | ||
|
||
# Print details for each instruction directly, no category splitting | ||
for name, instr_data in all_instructions: | ||
safe_print_instruction_details(name, instr_data, outfile) | ||
|
||
print("Output has been written to output.txt") | ||
|
||
if __name__ == '__main__': | ||
main() |