diff --git a/README.md b/README.md index b579753..ae3b305 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ -# Change-impact-analysis Tool +# DeltaKernel Change Impact Analysis Tool + ## Table of Content + - [Introduction](#introduction) - [Innovation](#innovation) - [How to Use](#how-to-use) @@ -12,11 +14,12 @@ ## Introduction -The Change Impact Analysis Tool generates a comprehensive visual report detailing changes in both header files and source code between two Linux versions (tags in the Linux kernel repository: old_tag and new_tag). This tool helps developers view updates from the old version. +DeltaKernel Change Impact Analysis Tool generates a comprehensive visual report detailing changes in both header files and source code between two Linux versions (tags in the Linux kernel repository: old_tag and new_tag). This tool helps developers view updates from the old version. The diff report includes a subset of files from the Linux repository that are included in building the kernel, contributing to a focused and detailed report on the compile-time source code in Linux. ## Innovation + The idea of generating a web display for Linux kernel version change impact analysis is inspired by [Cregit](https://github.com/cregit/cregit). This tool innovates on Cregit by: - Considering the extensive code space the Linux kernel deals with, it provides a compile-time analysis instead of a static analysis of the commit history of the Linux kernel, presenting changes only in files used during compilation. @@ -45,6 +48,13 @@ Execute the tool by specifying the old and new tags: ```bash ./run_tool.sh [-c clone_path] [-u repo_link] [-s subsystem] ``` + +Example Usage: +```bash +./run_tool.sh "v6.8" "v6.9" -c "linux-clone" -u "https://github.com/torvalds/linux" -s "security" +# the tool will generate web update report on linux kernel v6.9 from v6.8 for security subsystem. +# the linux repository will be cloned during tool execution and will be cloned into a folder named linux-clone. +``` - ``: Specifies the old version tag. - ``: Specifies the new version tag. - `-c `: Optional. Defines the user-specified path to clone the Linux source code repository. @@ -76,7 +86,7 @@ The tool operates through a structured process to generate a comprehensive chang During linux kernel compilation, `Makefile.build` calls `$K/scripts/basic/fixdep.c` to generate a .cmd file for each source that collects dependency information during compilation. -This tool incorporates a modification that applies a patch (`patch.file`) to `scripts/basic/fixdep.c`, enabling it to output dependency information into a **list of header files** when building the kernel. +The `scripts/basic/fixdep.c` file generates a `.cmd` file containing dependency information for each source file that the kernel compiles. This tool includes a modification that applies a patch (fixdep-patch.file) to `fixdep.c`, enabling it to collect dependency files for each source file and output a comprehensive list of all source files and their dependencies for the entire kernel compilation. The resulting `dependency_list.txt`` is generated after kernel compilation. #### Source code diff --git a/build_scripts/build_collect_diff.sh b/build_scripts/build_collect_diff.sh index 0e490fb..98c67e3 100755 --- a/build_scripts/build_collect_diff.sh +++ b/build_scripts/build_collect_diff.sh @@ -7,12 +7,6 @@ set -e # check and install gcc-11 if not already installed install_package_safe() { - if ! command -v gcc-11 &> /dev/null; then - sudo apt update - sudo apt install gcc-11 - else - echo "GCC-11 is already installed." - fi if ! command -v libssl-dev &> /dev/null; then sudo apt-get update sudo apt-get install -y libssl-dev @@ -145,7 +139,7 @@ echo "the current os-linux version: " cat /etc/os-release echo "start running make" -make HOSTCC=gcc-11 CC=gcc-11 +make echo "finished compile kernel using gcc 11" diff --git a/build_scripts/tokenize.py b/build_scripts/tokenize.py index d9b0966..b065e7a 100755 --- a/build_scripts/tokenize.py +++ b/build_scripts/tokenize.py @@ -15,8 +15,9 @@ import json import argparse from datetime import datetime +import tempfile -CHUNK_HEADER_PATTERN = r'^@@ -\d+,\d+ \+(\d+),\d+ @@' +CHUNK_HEADER_PATTERN = r'^@@ -\d+,\d+ \+(\d+),\d+ f@@' COMMIT_REGEX = r'^commit ([0-9a-f]{40})$' AUTHOR_REGEX = r'^Author: (.+)$' DATE_REGEX = r'^Date:\s+(.+)$' @@ -301,8 +302,6 @@ def highlight_substring(commit_list): parser = argparse.ArgumentParser(description='Process some files.') parser.add_argument('git_diff_report', type=str, help='Path to git_diff_report.txt') - parser.add_argument('intermediate_file', type=str, - help='Path to intermediate_file') parser.add_argument('output_file', type=str, help='Path to output_file') parser.add_argument('tag1', type=str, help='old version tag') parser.add_argument('tag2', type=str, help='new verison tag') @@ -310,11 +309,14 @@ def highlight_substring(commit_list): args = parser.parse_args() git_diff_report_path = args.git_diff_report - intermediate_file_path = args.intermediate_file output_file_path = args.output_file TAG1 = args.tag1 TAG2 = args.tag2 + # Create a temporary file and get its path + temp_file = tempfile.NamedTemporaryFile(delete=False) + intermediate_file_path = temp_file.name + # Identify all the (line number in git diff report) added_lines = parse(git_diff_report_path) @@ -329,3 +331,4 @@ def highlight_substring(commit_list): print("starting tokenize function test", flush=True) tokenize(intermediate_file_path, output_file_path) print("Result exported to tokenize.json", flush=True) + os.remove(intermediate_file_path) diff --git a/generate_build_filelists.sh b/generate_build_filelists.sh index dfdcfa7..58dc39e 100755 --- a/generate_build_filelists.sh +++ b/generate_build_filelists.sh @@ -107,8 +107,8 @@ display_file_head "$curr_dir/build_data" "filtered_diff_source_replace.txt" 3 # Retrieve and tokenize commit info per added line echo "tokenizing each commit line ..." git checkout "$TAG2" -python3 "$curr_dir"/build_scripts/tokenize.py "$curr_dir/build_data/filtered_diff_header.txt" "$curr_dir/build_data/parse_git_header.json" "$curr_dir/build_data/tokenize_header.json" "$TAG1" "$TAG2" -python3 "$curr_dir"/build_scripts/tokenize.py "$curr_dir/build_data/filtered_diff_source.txt" "$curr_dir/build_data/parse_git_source.json" "$curr_dir/build_data/tokenize_source.json" "$TAG1" "$TAG2" +python3 "$curr_dir"/build_scripts/tokenize.py "$curr_dir/build_data/filtered_diff_header.txt" "$curr_dir/build_data/tokenize_header.json" "$TAG1" "$TAG2" +python3 "$curr_dir"/build_scripts/tokenize.py "$curr_dir/build_data/filtered_diff_source.txt" "$curr_dir/build_data/tokenize_source.json" "$TAG1" "$TAG2" display_file_head "$curr_dir/build_data" "tokenize_source.json" 3 display_file_head "$curr_dir/build_data" "tokenize_header.json" 3 echo "finished tokenization"