njit-jerse · kelloggm · Aug 9, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
diff --git a/.github/workflows/specimin_evaluation_CI.yml b/.github/workflows/specimin_evaluation_CI.yml
@@ -0,0 +1,132 @@
+name: specimin_evaluation_CI
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  specimin-evaluation:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+          fetch-depth: 0
+
+      - name: Set up environment
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.8'
+      - uses: actions/setup-java@v2
+        with:
+          java-version: '21'
+          distribution: 'adopt'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          sudo apt-get update
+          sudo apt-get install -y jq curl bc
+
+      - name: Display CSV File Contents
+        run: |
+          CSV_PATH="/home/runner/work/specimin/specimin/CI_repository_list.csv"
+          if [ -f "$CSV_PATH" ]; then
+            cat "$CSV_PATH"
+          else
+            echo "File $CSV_PATH does not exist"
+            exit 1
+          fi
+
+      - name: Download git-clone-related and dependencies
+        run: |
+          curl -L -o git-clone-related https://raw.githubusercontent.com/plume-lib/git-scripts/main/git-clone-related
+          curl -L -o git-find-fork https://raw.githubusercontent.com/plume-lib/git-scripts/main/git-find-fork
+          curl -L -o git-find-branch https://raw.githubusercontent.com/plume-lib/git-scripts/main/git-find-branch
+          chmod +x git-clone-related git-find-fork git-find-branch
+
+      - name: Clone ASHE Project using git-clone-related
+        run: |
+          ./git-clone-related njit-jerse ASHE_Automated-Software-Hardening-for-Entrypoints ASHE
+
+      - name: Check and Rename Properties File
+        run: |
+          set -ex
+          CONFIG_PATH="ASHE/src/main/resources/config.properties"
+          EXAMPLE_PATH="ASHE/src/main/resources/example.properties"
+
+          if [ -f "$CONFIG_PATH" ]; then
+            echo "config.properties already exists"
+          elif [ -f "$EXAMPLE_PATH" ]; then
+            echo "example.properties found, renaming to config.properties"
+            mv "$EXAMPLE_PATH" "$CONFIG_PATH"
+            if [ -f "$CONFIG_PATH" ]; then
+              echo "config.properties created successfully"
+            else
+              echo "Failed to create config.properties"
+              exit 1
+            fi
+          else
+            echo "Neither config.properties nor example.properties found"
+            exit 1
+          fi
+
+          chmod +w "$CONFIG_PATH"
+          ls -l "$CONFIG_PATH"
+
+      - name: Update ASHE Config File to update SPECIMIN path
+        run: |
+          set -ex
+          sed -i 's|^specimin.tool.path=.*|specimin.tool.path='$(pwd)'|' ASHE/src/main/resources/config.properties
+
+      - name: Make all scripts under ashe_scripts executable
+        run: |
+          set -ex
+          chmod +x ashe_scripts/*.py
+
+      - name: Run the script
+        run: |
+          set -ex
+          python3 ashe_scripts/run_ashe_for_stats.py \
+            $(pwd)/ASHE \
+            $(pwd)/CI_repository_list.csv \
+            $(pwd)/ASHE/CI_REPO_CLONE_SPACE \
+            $(pwd)/ASHE/src/main/resources/config.properties
+
+      - name: Parse accuracy percentage
+        id: parse_accuracy_percentage
+        run: |
+          current_accuracy=$(grep 'Fully successful from minimization to compilation' "$(pwd)/ASHE/logs/specimin_statistics.txt" | awk '{print $NF}' | tr -d '()%')
+          echo "Current accuracy: $current_accuracy"
+          echo "::set-output name=current_accuracy::$current_accuracy"
+
+      - name: Read latest run percentage from file
+        id: read_latest_run_percentage
+        run: |
+          if [ -f "$(pwd)/CI_Latest_run_percentage.txt" ]; then
+            latest_run_accuracy=$(cat "$(pwd)/CI_Latest_run_percentage.txt" | tr -d '()%')
+            echo "Latest run accuracy: $latest_run_accuracy"
+            echo "::set-output name=latest_run_accuracy::$latest_run_accuracy"
+          else
+            echo "File CI_Latest_run_percentage.txt does not exist"
+            exit 1
+          fi
+
+      - name: Validate accuracy
+        id: validate_accuracy
+        run: |
+          current_accuracy="${{ steps.parse_accuracy_percentage.outputs.current_accuracy }}"
+          latest_run_accuracy="${{ steps.read_latest_run_percentage.outputs.latest_run_accuracy }}"
+
+          if [ "$current_accuracy" != "$latest_run_accuracy" ]; then
+            echo "Current accuracy ($current_accuracy) does not match latest run accuracy ($latest_run_accuracy)."
+            exit 1
+          else
+            echo "Accuracy validation passed."
+          fi
diff --git a/CI_Latest_run_percentage.txt b/CI_Latest_run_percentage.txt
@@ -0,0 +1 @@
+70
diff --git a/CI_repository_list.csv b/CI_repository_list.csv
@@ -0,0 +1,2 @@
+Repository,Branch
+https://github.com/NiharikaJamble/plume-util,master
diff --git a/ashe_scripts/README.ME b/ashe_scripts/README.ME
@@ -0,0 +1,10 @@
+# Specimin Statistics and Exception Ranking
+
+### specimin_statistics.py
+The script to parse the ASHE log files and generate statistical data from Specimin's minimization process.
+
+### specimin_exception_ranking.py
+The script to parse the ASHE log files and generate a ranking of the exceptions that occurred during the minimization process.
+
+### run_ashe_for_stats.py
+The script that clones ASHE, builds and runs it, and then runs the specimin_statistics.py and specimin_exception_rank.py scripts.
diff --git a/ashe_scripts/run_ashe_for_stats.py b/ashe_scripts/run_ashe_for_stats.py
@@ -0,0 +1,124 @@
+"""
+Script to run Ashe.RepositoryAutomationEngine and Specimin scripts to analyze the log file generated by ASHE in dryrun mode.
+https://github.com/jonathan-m-phillips/ASHE_Automated-Software-Hardening-for-Entrypoints
+
+Created by: Jonathan Phillips, https://github.com/jonathan-m-phillips
+Date: April 13, 2024
+
+Usage:
+python3 run_ashe_for_stats.py <path_to_clone_ashe> <path_to_csv> <path_to_clone_csv_repositories> <path_to_config.properties>
+"""
+import subprocess
+import sys
+import threading
+import datetime
+import time
+import os
+
+
+def run(ashe_path: str, csv_path: str, clone_path: str, props_file_path: str):
+    """
+    Run ASHE and Specimin scripts to analyze the log file.
+    Args:
+        ashe_path: absolute path to clone the ASHE repository
+        csv_path: absolute path to the CSV file containing the repositories ASHE will iterate over
+        clone_path: absolute path to clone the repositories in the CSV file ASHE will iterate over
+        props_file_path: absolute path to the directory containing the config.properties files for ASHE
+    """
+
+    ashe_url: str = "https://github.com/jonathan-m-phillips/ASHE_Automated-Software-Hardening-for-Entrypoints"
+    # clone or update repository
+    __git_clone_or_update(ashe_url, ashe_path)
+
+    start_time: datetime = datetime.datetime.now()
+    status_thread: threading.Thread = threading.Thread(target=__print_ashe_runtime, args=(start_time,))
+    status_thread.daemon = True
+    status_thread.start()
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    print(f"Current directory path: {current_dir}")
+    current_dir = current_dir.replace('ASHE/ashe_scripts', 'ashe_scripts')
+    main_project_dir = os.path.abspath(os.path.join(current_dir, '..', '..'))
+    stats_script = os.path.join(current_dir, 'specimin_statistics.py')
+    rank_script = os.path.join(current_dir, 'specimin_exception_rank.py')
+    print(f"Current directory path after normalising: {current_dir}")
+    print(f"main project path: {main_project_dir}")
+    print(f"Statistics script path: {stats_script}")
+    print(f"Exception rank script path: {rank_script}")
+
+    __build_and_run_ashe(csv_path, clone_path, props_file_path, working_dir=ashe_path)
+
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    print(f"Current directory path: {current_dir}")
+    current_dir = current_dir.replace('ASHE/ashe_scripts', 'ashe_scripts')
+    main_project_dir = os.path.abspath(os.path.join(current_dir, '..', '..'))
+    stats_script = os.path.join(current_dir, 'specimin_statistics.py')
+    rank_script = os.path.join(current_dir, 'specimin_exception_rank.py')
+    print(f"Current directory path after normalising: {current_dir}")
+    print(f"main project path: {main_project_dir}")
+    print(f"Statistics script path: {stats_script}")
+    print(f"Exception rank script path: {rank_script}")
+    # run Specimin scripts
+    log_path: str = os.path.join(ashe_path, "logs", "app.log")
+    print("Running statistics script...")
+    __run_command(f"python3 {stats_script} {log_path}")
+
+    print("Running exception rank script...")
+    __run_command(f"python3 {rank_script} {log_path}")
+
+
+def __run_command(command, working_dir=None):
+    try:
+        result = subprocess.run(command, cwd=working_dir, shell=True, check=True, stdout=subprocess.PIPE,
+                                stderr=subprocess.PIPE)
+        print(result.stdout.decode())
+    except subprocess.CalledProcessError as e:
+        print("Error executing command:", e.stderr.decode())
+
+
+def __git_clone_or_update(repo_url, ashe_path):
+    """Clone or update the git repository."""
+    if not os.path.exists(ashe_path):
+        print("Cloning the repository...")
+        __run_command(f"git clone {repo_url} {ashe_path}")
+    else:
+        print("Repository exists. Checking if it's a Git repository...")
+        if not os.path.exists(os.path.join(ashe_path, '.git')):
+            print(f"The directory {ashe_path} is not a Git repository.")
+            __run_command(f"git clone {repo_url} {ashe_path}")
+        else:
+            print("Updating the repository...")
+            os.chdir(ashe_path)
+            __run_command("git pull")
+
+
+def __build_and_run_ashe(csv_path: str, clone_path: str, props_file_path: str, working_dir: str):
+    """Build and run the ASHE project using gradle."""
+    # build ASHE
+    build_command: str = './gradlew build'
+    model_type: str = "dryrun"
+    run_automation_command: str = f"./gradlew runRepositoryAutomation -PrepositoriesCsvPath=\"{csv_path}\" -PcloneDirectory=\"{clone_path}\" -Pllm=\"{model_type}\" -PpropsFilePath=\"{props_file_path}\""
+
+    print("Building ASHE...")
+    __run_command(build_command, working_dir=working_dir)
+
+    print("Running ASHE...")
+    __run_command(run_automation_command, working_dir=working_dir)
+
+
+def __print_ashe_runtime(start_time):
+    """Function to print the elapsed time since ASHE started."""
+    print("ASHE started.")
+    print("ASHE runtime: 00:00:00")
+    while True:
+        time.sleep(300)  # sleep for 5 minute
+        elapsed_time = datetime.datetime.now() - start_time
+        # format elapsed time into H:M:S
+        formatted_time = str(elapsed_time).split('.')[0]  # remove microseconds
+        print(f"ASHE runtime: {formatted_time}")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 4:
+        print("Usage: python3 run_ashe_for_stats.py <path_to_clone_ashe> <path_to_csv> <path_to_clone_csv_repositories> <path_to_config.properties>")
+        sys.exit(1)
+    run(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
diff --git a/ashe_scripts/specimin_exception_rank.py b/ashe_scripts/specimin_exception_rank.py
@@ -0,0 +1,125 @@
+"""
+Script for analyzing log files generated by ASHE in dryrun mode.
+https://github.com/jonathan-m-phillips/ASHE_Automated-Software-Hardening-for-Entrypoints
+
+Created by: Jonathan Phillips, https://github.com/jonathan-m-phillips
+Date: April 13, 2024
+
+Description:
+This script reads a log file and ranks the exceptions by how frequently they occur. If the exceptions
+occur more often, they are ranked higher. These exception rankings come from running the
+Ashe.RepositoryAutomationEngine in dryrun mode.
+
+Output:
+Rankings written to a txt file in the same directory as the provided log file.
+
+Usage:
+python3 specimin_exception_rank.py <path_to_log_file.log>
+"""
+
+import sys
+import os
+import re
+from collections import defaultdict
+
+
+def analyze_log(file_path: str):
+    directory = os.path.dirname(file_path)
+    output_file_path = os.path.join(directory, 'specimin_exception_rank.txt')
+
+    with open(file_path, 'r') as file:
+        content = file.readlines()
+
+    exceptions = __extract_exceptions(content)
+    ranked_exceptions = __rank_exceptions(exceptions)
+
+    __write_ranked_exceptions(ranked_exceptions, output_file_path)
+    print("Write successful")
+
+
+def __extract_exceptions(log_lines):
+    """
+    Extract exceptions from the log lines. An exception is defined as a line that starts with "Exception in thread"
+    Args:
+        log_lines: A list of log lines
+
+    Returns: A list of tuples (name, message, example_line)
+    """
+    # Enhanced to capture an example line following the exception message
+    exception_pattern = re.compile(r'^Exception in thread ".*?" (\w+.*?):(.*?)(?=\n\S|\Z)', re.DOTALL)
+    context_pattern = re.compile(r'^\s+at (.+)$', re.MULTILINE)
+    exceptions = []
+    for i, line in enumerate(log_lines):
+        match = exception_pattern.search(line)
+        if match:
+            exception_name, message = match.groups()
+            # find the next line that starts with whitespace followed by "at" to capture the context
+            context_match = context_pattern.search(log_lines[i + 1] if i + 1 < len(log_lines) else "")
+            example_line = context_match.group(1).strip() if context_match else "No code context available"
+            exceptions.append([exception_name.strip(), message.strip(), example_line])
+    return exceptions
+
+
+def __rank_exceptions(exceptions):
+    """
+    Rank the exceptions by how frequently they occur. If the exceptions occur more often, they are ranked higher.
+    Args:
+        exceptions: A list of tuples (name, message, example_line)
+
+    Returns: A sorted list of tuples (count, examples, name, message)
+    """
+    grouped_exceptions = defaultdict(list)
+    for name, message, example in exceptions:
+        simplified_message = simplify_message(message)
+        grouped_exceptions[(name, simplified_message)].append(example)
+
+    # convert grouped data into a sorted list of tuples (count, examples, name, message)
+    sorted_exceptions = sorted(((len(v), v, k[0], k[1]) for k, v in grouped_exceptions.items()), reverse=True,
+                               key=lambda x: x[0])
+    return sorted_exceptions
+
+
+def simplify_message(message):
+    """
+    Simplify the exception message by removing certain patterns that are not helpful for distinguishing exceptions.
+    Args:
+        message: The exception message for Specimin developers to analyze
+
+    Returns: A simplified version of the message
+    """
+    message = re.sub(r'\bat [\w\.$<>]+\(.*?\)', '', message)
+    message = re.sub(r'\bLine \d+\b', '', message)
+    message = re.sub(r'\bmemory address 0x[\da-f]+\b', '', message, flags=re.I)
+    return message.strip()
+
+
+def __write_ranked_exceptions(ranked_exceptions, output_file_path):
+    current_rank = 1
+    last_count = None
+    rank_increment = 0  # keeps track of how many ranks we should jump after ties
+
+    with open(output_file_path, 'w') as output_file:
+        for count, examples, name, message in ranked_exceptions:
+            if last_count != count:
+                current_rank += rank_increment
+                rank_increment = 1  # reset for next potential tie group
+            else:
+                rank_increment += 1  # increment to account for a tie when next different count comes
+
+            last_count = count
+            output_line = f"""
+Rank: {current_rank},
+Count: {count},
+Exception: {name},
+Message: {message},
+Example: {examples[0]}
+
+"""
+            output_file.write(output_line)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print("Usage: python3 specimin_exception_rank.py <path_to_log_file.log>")
+        sys.exit(1)
+    analyze_log(sys.argv[1])
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Repository,Branch
		https://github.com/NiharikaJamble/plume-util,master