diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index e292d49..c6725cf 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -25,9 +25,9 @@ jobs: run: | pipenv install --deploy --dev - - name: Pylint on esbmc_ai_lib + - name: Pylint on esbmc_ai run: | - pipenv run pylint esbmc_ai_lib + pipenv run pylint esbmc_ai test: name: PyTest diff --git a/.vscode/launch.json b/.vscode/launch.json index 2b76217..99814f0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,7 +8,7 @@ "name": "Launch ESBMC-AI on Open File", "type": "python", "request": "launch", - "module": "esbmc_ai_lib", + "module": "esbmc_ai", "justMyCode": true, "cwd": "${workspaceFolder}", "args": [ @@ -19,7 +19,7 @@ "name": "Fix Code on Open File", "type": "python", "request": "launch", - "module": "esbmc_ai_lib", + "module": "esbmc_ai", "justMyCode": true, "cwd": "${workspaceFolder}", "args": [ @@ -33,7 +33,7 @@ "name": "Optimize Code on Open File", "type": "python", "request": "launch", - "module": "esbmc_ai_lib", + "module": "esbmc_ai", "justMyCode": true, "cwd": "${workspaceFolder}", "args": [ diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..8ba0dee --- /dev/null +++ b/build.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env sh + +echo "Building ESBMC-AI" +python3 -m build + diff --git a/esbmc-ai b/esbmc-ai index d97f503..937d073 100755 --- a/esbmc-ai +++ b/esbmc-ai @@ -1,3 +1,3 @@ #!/usr/bin/env sh -python -m esbmc_ai_lib $@ +python -m esbmc_ai $@ diff --git a/esbmc_ai.py b/esbmc_ai.py deleted file mode 100755 index aeb8922..0000000 --- a/esbmc_ai.py +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env python3 - -# Author: Yiannis Charalambous 2023 - -from esbmc_ai_lib.__main__ import main - -main() diff --git a/esbmc_ai/__about__.py b/esbmc_ai/__about__.py new file mode 100644 index 0000000..18ab2f0 --- /dev/null +++ b/esbmc_ai/__about__.py @@ -0,0 +1,4 @@ +# Author: Yiannis Charalambous + +__version__: str = "v0.4.0" +__author__: str = "Yiannis Charalambous" diff --git a/esbmc_ai_lib/__init__.py b/esbmc_ai/__init__.py similarity index 76% rename from esbmc_ai_lib/__init__.py rename to esbmc_ai/__init__.py index 752f9f8..ca668db 100644 --- a/esbmc_ai_lib/__init__.py +++ b/esbmc_ai/__init__.py @@ -2,4 +2,4 @@ Software Bounded Model Checker. With the power of LLMs, it adds features such as automatic code fixing and more.""" -version: str = "0.4.0" +from esbmc_ai.__about__ import __version__, __author__ diff --git a/esbmc_ai_lib/__main__.py b/esbmc_ai/__main__.py similarity index 94% rename from esbmc_ai_lib/__main__.py rename to esbmc_ai/__main__.py index 96f3e96..f2316df 100755 --- a/esbmc_ai_lib/__main__.py +++ b/esbmc_ai/__main__.py @@ -13,27 +13,29 @@ import argparse from langchain.base_language import BaseLanguageModel -import esbmc_ai_lib.config as config -from esbmc_ai_lib.frontend.solution import ( + +import esbmc_ai.config as config +from esbmc_ai import __author__, __version__ +from esbmc_ai.frontend.solution import ( SourceFile, get_main_source_file, set_main_source_file, get_main_source_file_path, ) -from esbmc_ai_lib.commands import ( +from esbmc_ai.commands import ( ChatCommand, FixCodeCommand, HelpCommand, ExitCommand, ) -from esbmc_ai_lib.loading_widget import LoadingWidget, create_loading_widget -from esbmc_ai_lib.user_chat import UserChat -from esbmc_ai_lib.logging import printv, printvv -from esbmc_ai_lib.esbmc_util import esbmc -from esbmc_ai_lib.chat_response import FinishReason, ChatResponse -from esbmc_ai_lib.ai_models import _ai_model_names +from esbmc_ai.loading_widget import LoadingWidget, create_loading_widget +from esbmc_ai.user_chat import UserChat +from esbmc_ai.logging import printv, printvv +from esbmc_ai.esbmc_util import esbmc +from esbmc_ai.chat_response import FinishReason, ChatResponse +from esbmc_ai.ai_models import _ai_model_names commands: list[ChatCommand] = [] @@ -197,7 +199,7 @@ def main() -> None: # argparse.RawDescriptionHelpFormatter allows the ESBMC_HELP to display # properly. formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=f"Made by Yiannis Charalambous\n\n{ESBMC_HELP}", + epilog=f"Made by {__author__}\n\n{ESBMC_HELP}", ) parser.add_argument( @@ -211,6 +213,14 @@ def main() -> None: help="Any arguments after the filename will be passed to ESBMC as parameters.", ) + parser.add_argument( + "-V", + "--version", + action="version", + version="%(prog)s {version}".format(version=__version__), + help="Show version information.", + ) + parser.add_argument( "-v", "--verbose", @@ -249,7 +259,7 @@ def main() -> None: args = parser.parse_args() print("ESBMC-AI") - print("Made by Yiannis Charalambous") + print(f"Made by {__author__}") print() init_check_health(args.verbose) diff --git a/esbmc_ai_lib/ai_models.py b/esbmc_ai/ai_models.py similarity index 99% rename from esbmc_ai_lib/ai_models.py rename to esbmc_ai/ai_models.py index 19f91b6..e0f718e 100644 --- a/esbmc_ai_lib/ai_models.py +++ b/esbmc_ai/ai_models.py @@ -23,7 +23,7 @@ ) -from esbmc_ai_lib.api_key_collection import APIKeyCollection +from esbmc_ai.api_key_collection import APIKeyCollection class AIModel(object): diff --git a/esbmc_ai_lib/api_key_collection.py b/esbmc_ai/api_key_collection.py similarity index 100% rename from esbmc_ai_lib/api_key_collection.py rename to esbmc_ai/api_key_collection.py diff --git a/esbmc_ai_lib/base_chat_interface.py b/esbmc_ai/base_chat_interface.py similarity index 100% rename from esbmc_ai_lib/base_chat_interface.py rename to esbmc_ai/base_chat_interface.py diff --git a/esbmc_ai_lib/chat_response.py b/esbmc_ai/chat_response.py similarity index 100% rename from esbmc_ai_lib/chat_response.py rename to esbmc_ai/chat_response.py diff --git a/esbmc_ai_lib/commands/__init__.py b/esbmc_ai/commands/__init__.py similarity index 100% rename from esbmc_ai_lib/commands/__init__.py rename to esbmc_ai/commands/__init__.py diff --git a/esbmc_ai_lib/commands/chat_command.py b/esbmc_ai/commands/chat_command.py similarity index 100% rename from esbmc_ai_lib/commands/chat_command.py rename to esbmc_ai/commands/chat_command.py diff --git a/esbmc_ai_lib/commands/exit_command.py b/esbmc_ai/commands/exit_command.py similarity index 100% rename from esbmc_ai_lib/commands/exit_command.py rename to esbmc_ai/commands/exit_command.py diff --git a/esbmc_ai_lib/commands/fix_code_command.py b/esbmc_ai/commands/fix_code_command.py similarity index 99% rename from esbmc_ai_lib/commands/fix_code_command.py rename to esbmc_ai/commands/fix_code_command.py index f0f4868..44f24fb 100644 --- a/esbmc_ai_lib/commands/fix_code_command.py +++ b/esbmc_ai/commands/fix_code_command.py @@ -6,7 +6,7 @@ from typing_extensions import override from langchain.schema import AIMessage, HumanMessage -from esbmc_ai_lib.chat_response import FinishReason +from esbmc_ai.chat_response import FinishReason from .chat_command import ChatCommand from .. import config diff --git a/esbmc_ai_lib/commands/help_command.py b/esbmc_ai/commands/help_command.py similarity index 100% rename from esbmc_ai_lib/commands/help_command.py rename to esbmc_ai/commands/help_command.py diff --git a/esbmc_ai_lib/commands/verify_code_command.py b/esbmc_ai/commands/verify_code_command.py similarity index 100% rename from esbmc_ai_lib/commands/verify_code_command.py rename to esbmc_ai/commands/verify_code_command.py diff --git a/esbmc_ai_lib/config.py b/esbmc_ai/config.py similarity index 100% rename from esbmc_ai_lib/config.py rename to esbmc_ai/config.py diff --git a/esbmc_ai_lib/esbmc_util.py b/esbmc_ai/esbmc_util.py similarity index 100% rename from esbmc_ai_lib/esbmc_util.py rename to esbmc_ai/esbmc_util.py diff --git a/esbmc_ai_lib/frontend/__init__.py b/esbmc_ai/frontend/__init__.py similarity index 100% rename from esbmc_ai_lib/frontend/__init__.py rename to esbmc_ai/frontend/__init__.py diff --git a/esbmc_ai_lib/frontend/solution.py b/esbmc_ai/frontend/solution.py similarity index 100% rename from esbmc_ai_lib/frontend/solution.py rename to esbmc_ai/frontend/solution.py diff --git a/esbmc_ai_lib/loading_widget.py b/esbmc_ai/loading_widget.py similarity index 98% rename from esbmc_ai_lib/loading_widget.py rename to esbmc_ai/loading_widget.py index c0c37af..fd4f3fb 100644 --- a/esbmc_ai_lib/loading_widget.py +++ b/esbmc_ai/loading_widget.py @@ -12,7 +12,7 @@ from threading import Thread from typing import Optional -from esbmc_ai_lib import config +from esbmc_ai import config class LoadingWidget(object): diff --git a/esbmc_ai_lib/logging.py b/esbmc_ai/logging.py similarity index 100% rename from esbmc_ai_lib/logging.py rename to esbmc_ai/logging.py diff --git a/esbmc_ai_lib/msg_bus.py b/esbmc_ai/msg_bus.py similarity index 100% rename from esbmc_ai_lib/msg_bus.py rename to esbmc_ai/msg_bus.py diff --git a/esbmc_ai_lib/optimize_code.py b/esbmc_ai/optimize_code.py similarity index 95% rename from esbmc_ai_lib/optimize_code.py rename to esbmc_ai/optimize_code.py index ce1015f..092c2ce 100644 --- a/esbmc_ai_lib/optimize_code.py +++ b/esbmc_ai/optimize_code.py @@ -3,7 +3,7 @@ from langchain.base_language import BaseLanguageModel from langchain.schema import AIMessage, BaseMessage, HumanMessage -from esbmc_ai_lib.config import ChatPromptSettings +from esbmc_ai.config import ChatPromptSettings from .base_chat_interface import BaseChatInterface, ChatResponse from .ai_models import AIModel diff --git a/esbmc_ai_lib/solution_generator.py b/esbmc_ai/solution_generator.py similarity index 94% rename from esbmc_ai_lib/solution_generator.py rename to esbmc_ai/solution_generator.py index c02a82e..ef2a8af 100644 --- a/esbmc_ai_lib/solution_generator.py +++ b/esbmc_ai/solution_generator.py @@ -4,8 +4,8 @@ from langchain.base_language import BaseLanguageModel from langchain.schema import BaseMessage -from esbmc_ai_lib.chat_response import ChatResponse, FinishReason -from esbmc_ai_lib.config import DynamicAIModelAgent +from esbmc_ai.chat_response import ChatResponse, FinishReason +from esbmc_ai.config import DynamicAIModelAgent from .ai_models import AIModel from .base_chat_interface import BaseChatInterface diff --git a/esbmc_ai_lib/user_chat.py b/esbmc_ai/user_chat.py similarity index 97% rename from esbmc_ai_lib/user_chat.py rename to esbmc_ai/user_chat.py index 320a09e..7b51e40 100644 --- a/esbmc_ai_lib/user_chat.py +++ b/esbmc_ai/user_chat.py @@ -6,7 +6,7 @@ from langchain.memory import ConversationSummaryMemory, ChatMessageHistory from langchain.schema import BaseMessage, PromptValue, SystemMessage -from esbmc_ai_lib.config import AIAgentConversation, ChatPromptSettings +from esbmc_ai.config import AIAgentConversation, ChatPromptSettings from .ai_models import AIModel from .base_chat_interface import BaseChatInterface diff --git a/notebooks/ast.ipynb b/notebooks/ast.ipynb index 0f58a32..cd05e5f 100644 --- a/notebooks/ast.ipynb +++ b/notebooks/ast.ipynb @@ -1,435 +1,435 @@ { - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "# Test LLVM AST Notebook\n", - "\n", - "## Author: Yiannis Charalambous\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from clang.cindex import Config\n", - "import clang.native\n", - "import clang.cindex\n", - "import sys\n", - "from typing import NamedTuple\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Connect the Python API of Clang to the libclang.so file bundled in the libclang PyPI package.\n", - "Config.library_file = os.path.join(\n", - " os.path.dirname(clang.native.__file__),\n", - " \"libclang.so\",\n", - ")\n", - "\n", - "module_path = os.path.abspath(os.path.join(\"..\"))\n", - "if module_path not in sys.path:\n", - " sys.path.append(module_path)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Found a [line=4, col=5]\n", - " Token int TokenKind.KEYWORD\n", - " Token a TokenKind.IDENTIFIER\n", - "Start: 42, End: 47, Range: 5\n", - "\n", - "Found b [line=4, col=8]\n", - " Token int TokenKind.KEYWORD\n", - " Token a TokenKind.IDENTIFIER\n", - " Token , TokenKind.PUNCTUATION\n", - " Token b TokenKind.IDENTIFIER\n", - "Start: 42, End: 50, Range: 8\n", - "\n", - "Found __VERIFIER_atomic_acquire [line=5, col=6]\n", - " Token void TokenKind.KEYWORD\n", - " Token __VERIFIER_atomic_acquire TokenKind.IDENTIFIER\n", - " Token ( TokenKind.PUNCTUATION\n", - " Token void TokenKind.KEYWORD\n", - " Token ) TokenKind.PUNCTUATION\n", - " Token { TokenKind.PUNCTUATION\n", - " Token __VERIFIER_assume TokenKind.IDENTIFIER\n", - " Token ( TokenKind.PUNCTUATION\n", - " Token a TokenKind.IDENTIFIER\n", - " Token == TokenKind.PUNCTUATION\n", - " Token 0 TokenKind.LITERAL\n", - " Token ) TokenKind.PUNCTUATION\n", - " Token ; TokenKind.PUNCTUATION\n", - " Token a TokenKind.IDENTIFIER\n", - " Token = TokenKind.PUNCTUATION\n", - " Token 1 TokenKind.LITERAL\n", - " Token ; TokenKind.PUNCTUATION\n", - " Token } TokenKind.PUNCTUATION\n", - "Start: 52, End: 134, Range: 82\n", - "\n", - "Found c [line=10, col=7]\n", - " Token void TokenKind.KEYWORD\n", - " Token * TokenKind.PUNCTUATION\n", - " Token c TokenKind.IDENTIFIER\n", - " Token ( TokenKind.PUNCTUATION\n", - " Token void TokenKind.KEYWORD\n", - " Token * TokenKind.PUNCTUATION\n", - " Token arg TokenKind.IDENTIFIER\n", - " Token ) TokenKind.PUNCTUATION\n", - " Token { TokenKind.PUNCTUATION\n", - " Token ; TokenKind.PUNCTUATION\n", - " Token __VERIFIER_atomic_acquire TokenKind.IDENTIFIER\n", - " Token ( TokenKind.PUNCTUATION\n", - " Token ) TokenKind.PUNCTUATION\n", - " Token ; TokenKind.PUNCTUATION\n", - " Token b TokenKind.IDENTIFIER\n", - " Token = TokenKind.PUNCTUATION\n", - " Token 1 TokenKind.LITERAL\n", - " Token ; TokenKind.PUNCTUATION\n", - " Token return TokenKind.KEYWORD\n", - " Token NULL TokenKind.IDENTIFIER\n", - " Token ; TokenKind.PUNCTUATION\n", - " Token } TokenKind.PUNCTUATION\n", - "Start: 135, End: 224, Range: 89\n", - "\n", - "Found d [line=17, col=11]\n", - " Token pthread_t TokenKind.IDENTIFIER\n", - " Token d TokenKind.IDENTIFIER\n", - "Start: 225, End: 236, Range: 11\n", - "\n", - "Found main [line=18, col=5]\n", - " Token int TokenKind.KEYWORD\n", - " Token main TokenKind.IDENTIFIER\n", - " Token ( TokenKind.PUNCTUATION\n", - " Token ) TokenKind.PUNCTUATION\n", - " Token { TokenKind.PUNCTUATION\n", - " Token pthread_create TokenKind.IDENTIFIER\n", - " Token ( TokenKind.PUNCTUATION\n", - " Token & TokenKind.PUNCTUATION\n", - " Token d TokenKind.IDENTIFIER\n", - " Token , TokenKind.PUNCTUATION\n", - " Token 0 TokenKind.LITERAL\n", - " Token , TokenKind.PUNCTUATION\n", - " Token c TokenKind.IDENTIFIER\n", - " Token , TokenKind.PUNCTUATION\n", - " Token 0 TokenKind.LITERAL\n", - " Token ) TokenKind.PUNCTUATION\n", - " Token ; TokenKind.PUNCTUATION\n", - " Token __VERIFIER_atomic_acquire TokenKind.IDENTIFIER\n", - " Token ( TokenKind.PUNCTUATION\n", - " Token ) TokenKind.PUNCTUATION\n", - " Token ; TokenKind.PUNCTUATION\n", - " Token if TokenKind.KEYWORD\n", - " Token ( TokenKind.PUNCTUATION\n", - " Token ! TokenKind.PUNCTUATION\n", - " Token b TokenKind.IDENTIFIER\n", - " Token ) TokenKind.PUNCTUATION\n", - " Token assert TokenKind.IDENTIFIER\n", - " Token ( TokenKind.PUNCTUATION\n", - " Token 0 TokenKind.LITERAL\n", - " Token ) TokenKind.PUNCTUATION\n", - " Token ; TokenKind.PUNCTUATION\n", - " Token return TokenKind.KEYWORD\n", - " Token 0 TokenKind.LITERAL\n", - " Token ; TokenKind.PUNCTUATION\n", - " Token } TokenKind.PUNCTUATION\n", - "Start: 238, End: 363, Range: 125\n", - "\n", - "Total 6\n" - ] - } - ], - "source": [ - "FILE = \"../samples/threading.c\"\n", - "\n", - "\n", - "def get_declarations_local(root: clang.cindex.Cursor) -> list[clang.cindex.Cursor]:\n", - " declarations: list[clang.cindex.Cursor] = []\n", - " declarations_raw: set[str] = {}\n", - " tokens: list[clang.cindex.Token] = []\n", - " # Scan all direct symbols in root.\n", - " for child in root.get_children():\n", - " # print(f\"Scanning: {child.spelling}\")\n", - " node: clang.cindex.Cursor = child\n", - " kind: clang.cindex.CursorKind = node.kind\n", - " # Check if it is actually from the file.\n", - " if (\n", - " kind.is_declaration()\n", - " and node.storage_class == clang.cindex.StorageClass.NONE\n", - " ):\n", - " print(\n", - " f\"Found {node.spelling} [line={node.location.line}, col={node.location.column}]\"\n", - " )\n", - " tokens: clang.cindex.Token = node.get_tokens()\n", - " for token in tokens:\n", - " print(f\" Token {token.spelling} {token.kind}\")\n", - " loc: clang.cindex.SourceRange = node.extent\n", - " end: clang.cindex.SourceLocation = loc.end\n", - " start: clang.cindex.SourceLocation = loc.start\n", - " print(\n", - " f\"Start: {start.offset}, End: {end.offset}, Range: {end.offset - start.offset}\"\n", - " )\n", - " print()\n", - " declarations.append(node)\n", - " return declarations\n", - "\n", - "\n", - "index: clang.cindex.Index = clang.cindex.Index.create()\n", - "tu: clang.cindex.TranslationUnit = index.parse(FILE)\n", - "root: clang.cindex.Cursor = tu.cursor\n", - "declarations: clang.cindex.Cursor = get_declarations_local(root)\n", - "\n", - "print(f\"Total {len(declarations)}\")\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## Reversing Reach AST To Source Code\n", - "\n", - "The only issue I have found, multiple declarations in one statement need to be recognized and the nodes combined:\n", - "\n", - "```c\n", - "int a, b;\n", - "```\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Code for a:\n", - "```\n", - "int a\n", - "```\n", - "\n", - "Code for b:\n", - "```\n", - "int a, b\n", - "```\n", - "\n", - "Code for __VERIFIER_atomic_acquire():\n", - "```\n", - "void __VERIFIER_atomic_acquire(void)\n", - "{\n", - " __VERIFIER_assume(a == 0);\n", - " a = 1;\n", - "}\n", - "```\n", - "\n", - "Code for c(void *):\n", - "```\n", - "void *c(void *arg)\n", - "{\n", - " ;\n", - " __VERIFIER_atomic_acquire();\n", - " b = 1;\n", - " return NULL;\n", - "}\n", - "```\n", - "\n", - "Code for d:\n", - "```\n", - "pthread_t d\n", - "```\n", - "\n", - "Code for main():\n", - "```\n", - "int main()\n", - "{\n", - " pthread_create(&d, 0, c, 0);\n", - " __VERIFIER_atomic_acquire();\n", - " if (!b)\n", - " assert(0);\n", - " return 0;\n", - "}\n", - "```\n", - "\n" - ] - } - ], - "source": [ - "with open(FILE) as file:\n", - " source_code: str = file.read()\n", - "\n", - "\n", - "def get_node_source_code(source_code: str, node: clang.cindex.Cursor) -> str:\n", - " loc: clang.cindex.SourceRange = node.extent\n", - " start: clang.cindex.SourceLocation = loc.start\n", - " end: clang.cindex.SourceLocation = loc.end\n", - " return source_code[start.offset : end.offset]\n", - "\n", - "\n", - "for node in declarations:\n", - " print(f\"Code for {node.displayname}:\")\n", - " print(\"```\")\n", - " print(get_node_source_code(source_code, node))\n", - " print(\"```\")\n", - " print()\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test Code\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from esbmc_ai_lib.frontend.ast import ClangAST\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "__VERIFIER_atomic_acquire()\n", - "\n", - "\n", - "\n", - "c(arg: void *)\n", - "\n", - "\n", - "\n", - "main()\n", - "\n", - "\n", - "\n" - ] - } - ], - "source": [ - "file = \"../samples/threading.c\"\n", - "cast = ClangAST(file)\n", - "functions = cast.get_fn_decl()\n", - "\n", - "for fn in functions:\n", - " print(str(fn) + \"\\n\")\n", - " # Seems like different cursors have the same translation unit...\n", - " print(fn.cursor)\n", - " print(fn.cursor.translation_unit)\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test Code 2\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "struct linear {value: int}\n", - "typedef (LinearTypeDef) struct linear {struct linear: struct linear}\n", - "Point {x: int, y: int}\n", - "typedef (Point) Point {Point: Point}\n", - "enum Types {ONE: int, TWO: int, THREE: int}\n", - "typedef (Typest) enum Types {Types: enum Types}\n", - "union Combines {a: int, b: int, c: int}\n", - "typedef (CombinesTypeDef) union Combines {union Combines: union Combines}\n" - ] - } - ], - "source": [ - "file = \"./samples/typedefs.c\"\n", - "cast = ClangAST(file)\n", - "functions = cast.get_type_decl()\n", - "\n", - "for fn in functions:\n", - " print(fn)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#include \"/usr/include/stdlib.h\"\n", - "#include \"/usr/include/assert.h\"\n" - ] - } - ], - "source": [ - "file = \"./samples/typedefs.c\"\n", - "cast: ClangAST = ClangAST(file)\n", - "includes = cast.get_include_directives()\n", - "\n", - "for include in includes:\n", - " print(include)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "esbmc-ai-awqrJrdH", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Test LLVM AST Notebook\n", + "\n", + "## Author: Yiannis Charalambous\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from clang.cindex import Config\n", + "import clang.native\n", + "import clang.cindex\n", + "import sys\n", + "from typing import NamedTuple\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Connect the Python API of Clang to the libclang.so file bundled in the libclang PyPI package.\n", + "Config.library_file = os.path.join(\n", + " os.path.dirname(clang.native.__file__),\n", + " \"libclang.so\",\n", + ")\n", + "\n", + "module_path = os.path.abspath(os.path.join(\"..\"))\n", + "if module_path not in sys.path:\n", + " sys.path.append(module_path)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found a [line=4, col=5]\n", + " Token int TokenKind.KEYWORD\n", + " Token a TokenKind.IDENTIFIER\n", + "Start: 42, End: 47, Range: 5\n", + "\n", + "Found b [line=4, col=8]\n", + " Token int TokenKind.KEYWORD\n", + " Token a TokenKind.IDENTIFIER\n", + " Token , TokenKind.PUNCTUATION\n", + " Token b TokenKind.IDENTIFIER\n", + "Start: 42, End: 50, Range: 8\n", + "\n", + "Found __VERIFIER_atomic_acquire [line=5, col=6]\n", + " Token void TokenKind.KEYWORD\n", + " Token __VERIFIER_atomic_acquire TokenKind.IDENTIFIER\n", + " Token ( TokenKind.PUNCTUATION\n", + " Token void TokenKind.KEYWORD\n", + " Token ) TokenKind.PUNCTUATION\n", + " Token { TokenKind.PUNCTUATION\n", + " Token __VERIFIER_assume TokenKind.IDENTIFIER\n", + " Token ( TokenKind.PUNCTUATION\n", + " Token a TokenKind.IDENTIFIER\n", + " Token == TokenKind.PUNCTUATION\n", + " Token 0 TokenKind.LITERAL\n", + " Token ) TokenKind.PUNCTUATION\n", + " Token ; TokenKind.PUNCTUATION\n", + " Token a TokenKind.IDENTIFIER\n", + " Token = TokenKind.PUNCTUATION\n", + " Token 1 TokenKind.LITERAL\n", + " Token ; TokenKind.PUNCTUATION\n", + " Token } TokenKind.PUNCTUATION\n", + "Start: 52, End: 134, Range: 82\n", + "\n", + "Found c [line=10, col=7]\n", + " Token void TokenKind.KEYWORD\n", + " Token * TokenKind.PUNCTUATION\n", + " Token c TokenKind.IDENTIFIER\n", + " Token ( TokenKind.PUNCTUATION\n", + " Token void TokenKind.KEYWORD\n", + " Token * TokenKind.PUNCTUATION\n", + " Token arg TokenKind.IDENTIFIER\n", + " Token ) TokenKind.PUNCTUATION\n", + " Token { TokenKind.PUNCTUATION\n", + " Token ; TokenKind.PUNCTUATION\n", + " Token __VERIFIER_atomic_acquire TokenKind.IDENTIFIER\n", + " Token ( TokenKind.PUNCTUATION\n", + " Token ) TokenKind.PUNCTUATION\n", + " Token ; TokenKind.PUNCTUATION\n", + " Token b TokenKind.IDENTIFIER\n", + " Token = TokenKind.PUNCTUATION\n", + " Token 1 TokenKind.LITERAL\n", + " Token ; TokenKind.PUNCTUATION\n", + " Token return TokenKind.KEYWORD\n", + " Token NULL TokenKind.IDENTIFIER\n", + " Token ; TokenKind.PUNCTUATION\n", + " Token } TokenKind.PUNCTUATION\n", + "Start: 135, End: 224, Range: 89\n", + "\n", + "Found d [line=17, col=11]\n", + " Token pthread_t TokenKind.IDENTIFIER\n", + " Token d TokenKind.IDENTIFIER\n", + "Start: 225, End: 236, Range: 11\n", + "\n", + "Found main [line=18, col=5]\n", + " Token int TokenKind.KEYWORD\n", + " Token main TokenKind.IDENTIFIER\n", + " Token ( TokenKind.PUNCTUATION\n", + " Token ) TokenKind.PUNCTUATION\n", + " Token { TokenKind.PUNCTUATION\n", + " Token pthread_create TokenKind.IDENTIFIER\n", + " Token ( TokenKind.PUNCTUATION\n", + " Token & TokenKind.PUNCTUATION\n", + " Token d TokenKind.IDENTIFIER\n", + " Token , TokenKind.PUNCTUATION\n", + " Token 0 TokenKind.LITERAL\n", + " Token , TokenKind.PUNCTUATION\n", + " Token c TokenKind.IDENTIFIER\n", + " Token , TokenKind.PUNCTUATION\n", + " Token 0 TokenKind.LITERAL\n", + " Token ) TokenKind.PUNCTUATION\n", + " Token ; TokenKind.PUNCTUATION\n", + " Token __VERIFIER_atomic_acquire TokenKind.IDENTIFIER\n", + " Token ( TokenKind.PUNCTUATION\n", + " Token ) TokenKind.PUNCTUATION\n", + " Token ; TokenKind.PUNCTUATION\n", + " Token if TokenKind.KEYWORD\n", + " Token ( TokenKind.PUNCTUATION\n", + " Token ! TokenKind.PUNCTUATION\n", + " Token b TokenKind.IDENTIFIER\n", + " Token ) TokenKind.PUNCTUATION\n", + " Token assert TokenKind.IDENTIFIER\n", + " Token ( TokenKind.PUNCTUATION\n", + " Token 0 TokenKind.LITERAL\n", + " Token ) TokenKind.PUNCTUATION\n", + " Token ; TokenKind.PUNCTUATION\n", + " Token return TokenKind.KEYWORD\n", + " Token 0 TokenKind.LITERAL\n", + " Token ; TokenKind.PUNCTUATION\n", + " Token } TokenKind.PUNCTUATION\n", + "Start: 238, End: 363, Range: 125\n", + "\n", + "Total 6\n" + ] + } + ], + "source": [ + "FILE = \"../samples/threading.c\"\n", + "\n", + "\n", + "def get_declarations_local(root: clang.cindex.Cursor) -> list[clang.cindex.Cursor]:\n", + " declarations: list[clang.cindex.Cursor] = []\n", + " declarations_raw: set[str] = {}\n", + " tokens: list[clang.cindex.Token] = []\n", + " # Scan all direct symbols in root.\n", + " for child in root.get_children():\n", + " # print(f\"Scanning: {child.spelling}\")\n", + " node: clang.cindex.Cursor = child\n", + " kind: clang.cindex.CursorKind = node.kind\n", + " # Check if it is actually from the file.\n", + " if (\n", + " kind.is_declaration()\n", + " and node.storage_class == clang.cindex.StorageClass.NONE\n", + " ):\n", + " print(\n", + " f\"Found {node.spelling} [line={node.location.line}, col={node.location.column}]\"\n", + " )\n", + " tokens: clang.cindex.Token = node.get_tokens()\n", + " for token in tokens:\n", + " print(f\" Token {token.spelling} {token.kind}\")\n", + " loc: clang.cindex.SourceRange = node.extent\n", + " end: clang.cindex.SourceLocation = loc.end\n", + " start: clang.cindex.SourceLocation = loc.start\n", + " print(\n", + " f\"Start: {start.offset}, End: {end.offset}, Range: {end.offset - start.offset}\"\n", + " )\n", + " print()\n", + " declarations.append(node)\n", + " return declarations\n", + "\n", + "\n", + "index: clang.cindex.Index = clang.cindex.Index.create()\n", + "tu: clang.cindex.TranslationUnit = index.parse(FILE)\n", + "root: clang.cindex.Cursor = tu.cursor\n", + "declarations: clang.cindex.Cursor = get_declarations_local(root)\n", + "\n", + "print(f\"Total {len(declarations)}\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## Reversing Reach AST To Source Code\n", + "\n", + "The only issue I have found, multiple declarations in one statement need to be recognized and the nodes combined:\n", + "\n", + "```c\n", + "int a, b;\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Code for a:\n", + "```\n", + "int a\n", + "```\n", + "\n", + "Code for b:\n", + "```\n", + "int a, b\n", + "```\n", + "\n", + "Code for __VERIFIER_atomic_acquire():\n", + "```\n", + "void __VERIFIER_atomic_acquire(void)\n", + "{\n", + " __VERIFIER_assume(a == 0);\n", + " a = 1;\n", + "}\n", + "```\n", + "\n", + "Code for c(void *):\n", + "```\n", + "void *c(void *arg)\n", + "{\n", + " ;\n", + " __VERIFIER_atomic_acquire();\n", + " b = 1;\n", + " return NULL;\n", + "}\n", + "```\n", + "\n", + "Code for d:\n", + "```\n", + "pthread_t d\n", + "```\n", + "\n", + "Code for main():\n", + "```\n", + "int main()\n", + "{\n", + " pthread_create(&d, 0, c, 0);\n", + " __VERIFIER_atomic_acquire();\n", + " if (!b)\n", + " assert(0);\n", + " return 0;\n", + "}\n", + "```\n", + "\n" + ] + } + ], + "source": [ + "with open(FILE) as file:\n", + " source_code: str = file.read()\n", + "\n", + "\n", + "def get_node_source_code(source_code: str, node: clang.cindex.Cursor) -> str:\n", + " loc: clang.cindex.SourceRange = node.extent\n", + " start: clang.cindex.SourceLocation = loc.start\n", + " end: clang.cindex.SourceLocation = loc.end\n", + " return source_code[start.offset : end.offset]\n", + "\n", + "\n", + "for node in declarations:\n", + " print(f\"Code for {node.displayname}:\")\n", + " print(\"```\")\n", + " print(get_node_source_code(source_code, node))\n", + " print(\"```\")\n", + " print()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Code\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from esbmc_ai.frontend.ast import ClangAST\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "__VERIFIER_atomic_acquire()\n", + "\n", + "\n", + "\n", + "c(arg: void *)\n", + "\n", + "\n", + "\n", + "main()\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "file = \"../samples/threading.c\"\n", + "cast = ClangAST(file)\n", + "functions = cast.get_fn_decl()\n", + "\n", + "for fn in functions:\n", + " print(str(fn) + \"\\n\")\n", + " # Seems like different cursors have the same translation unit...\n", + " print(fn.cursor)\n", + " print(fn.cursor.translation_unit)\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Code 2\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "struct linear {value: int}\n", + "typedef (LinearTypeDef) struct linear {struct linear: struct linear}\n", + "Point {x: int, y: int}\n", + "typedef (Point) Point {Point: Point}\n", + "enum Types {ONE: int, TWO: int, THREE: int}\n", + "typedef (Typest) enum Types {Types: enum Types}\n", + "union Combines {a: int, b: int, c: int}\n", + "typedef (CombinesTypeDef) union Combines {union Combines: union Combines}\n" + ] + } + ], + "source": [ + "file = \"./samples/typedefs.c\"\n", + "cast = ClangAST(file)\n", + "functions = cast.get_type_decl()\n", + "\n", + "for fn in functions:\n", + " print(fn)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#include \"/usr/include/stdlib.h\"\n", + "#include \"/usr/include/assert.h\"\n" + ] + } + ], + "source": [ + "file = \"./samples/typedefs.c\"\n", + "cast: ClangAST = ClangAST(file)\n", + "includes = cast.get_include_directives()\n", + "\n", + "for include in includes:\n", + " print(include)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "esbmc-ai-awqrJrdH", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b027c56 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,70 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "esbmc_ai" +# https://hatch.pypa.io/latest/version/ +version = "0.4.0dev0" +# dynamic = ["version"] +authors = [ + { name="Yiannis Charalambous", email="yiannis128@hotmail.com" }, +] +description = "LLM driven development and automatic repair kit." +readme = "README.md" +license-files = { paths = ["LICENSE"] } +requires-python = ">=3.10" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "License :: OSI Approved :: GNU Affero General Public License v3", +] + +keywords = [ + "esbmc", "LLM", "AI", "automated code repair", +] + +dependencies = [ + "openai", + "python-dotenv", + "tiktoken", + "aiohttp", + "aiosignal", + "async-timeout", + "attrs", + "certifi", + "charset-normalizer", + "frozenlist", + "idna", + "multidict", + "regex", + "requests", + "urllib3", + "yarl", + "libclang", + "clang", + "langchain", + "langchain-openai", +] + +#[project.optional-dependencies] +#option1 = [ +# "...", +#] +#option2 = [ +# "...", +#] + +[project.scripts] +esbmc_ai = "esbmc_ai.__main__:main" + +[project.urls] +Homepage = "https://github.com/Yiannis128/esbmc-ai" +"Source code" = "https://github.com/Yiannis128/esbmc-ai" +Documentation = "https://github.com/Yiannis128/esbmc-ai/wiki" +Issues = "https://github.com/Yiannis128/esbmc-ai/issues" + +# [tool.hatch.version] +# path = "__about__.py" + + diff --git a/tests/regtest/test_base_chat_interface.py b/tests/regtest/test_base_chat_interface.py index bd49ab1..f7d3db5 100644 --- a/tests/regtest/test_base_chat_interface.py +++ b/tests/regtest/test_base_chat_interface.py @@ -5,10 +5,10 @@ from langchain.llms.fake import FakeListLLM from langchain.schema import BaseMessage, HumanMessage, AIMessage, SystemMessage -from esbmc_ai_lib.ai_models import AIModel -from esbmc_ai_lib.chat_response import ChatResponse -from esbmc_ai_lib.base_chat_interface import BaseChatInterface -from esbmc_ai_lib.config import AIAgentConversation, ChatPromptSettings +from esbmc_ai.ai_models import AIModel +from esbmc_ai.chat_response import ChatResponse +from esbmc_ai.base_chat_interface import BaseChatInterface +from esbmc_ai.config import AIAgentConversation, ChatPromptSettings @pytest.fixture diff --git a/tests/test_ai_models.py b/tests/test_ai_models.py index ef41a3d..e9bdba6 100644 --- a/tests/test_ai_models.py +++ b/tests/test_ai_models.py @@ -9,7 +9,7 @@ ) from pytest import raises -from esbmc_ai_lib.ai_models import ( +from esbmc_ai.ai_models import ( add_custom_ai_model, is_valid_ai_model, AIModel, diff --git a/tests/test_base_chat_interface.py b/tests/test_base_chat_interface.py index 7079634..1b60a18 100644 --- a/tests/test_base_chat_interface.py +++ b/tests/test_base_chat_interface.py @@ -4,10 +4,10 @@ from langchain.llms.fake import FakeListLLM from langchain.schema import AIMessage, BaseMessage, HumanMessage, SystemMessage -from esbmc_ai_lib.ai_models import AIModel -from esbmc_ai_lib.base_chat_interface import BaseChatInterface -from esbmc_ai_lib.chat_response import ChatResponse -from esbmc_ai_lib.config import AIAgentConversation, ChatPromptSettings +from esbmc_ai.ai_models import AIModel +from esbmc_ai.base_chat_interface import BaseChatInterface +from esbmc_ai.chat_response import ChatResponse +from esbmc_ai.config import AIAgentConversation, ChatPromptSettings @pytest.fixture(scope="module") diff --git a/tests/test_command_parser.py b/tests/test_command_parser.py index 1ef6bd2..e6908e3 100644 --- a/tests/test_command_parser.py +++ b/tests/test_command_parser.py @@ -1,4 +1,4 @@ -from esbmc_ai_lib.__main__ import parse_command +from esbmc_ai.__main__ import parse_command def test_parse() -> None: diff --git a/tests/test_config.py b/tests/test_config.py index fb595c1..6fa3741 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,9 +2,9 @@ from pytest import raises -import esbmc_ai_lib.config as config +import esbmc_ai.config as config -from esbmc_ai_lib.ai_models import is_valid_ai_model +from esbmc_ai.ai_models import is_valid_ai_model def test_load_config_value() -> None: diff --git a/tests/test_user_chat.py b/tests/test_user_chat.py index 9d0d96d..3c2eca1 100644 --- a/tests/test_user_chat.py +++ b/tests/test_user_chat.py @@ -5,10 +5,10 @@ from langchain.llms.fake import FakeListLLM from langchain.schema import AIMessage, SystemMessage -from esbmc_ai_lib.ai_models import AIModel -from esbmc_ai_lib.chat_response import ChatResponse, FinishReason -from esbmc_ai_lib.config import AIAgentConversation, ChatPromptSettings -from esbmc_ai_lib.user_chat import UserChat +from esbmc_ai.ai_models import AIModel +from esbmc_ai.chat_response import ChatResponse, FinishReason +from esbmc_ai.config import AIAgentConversation, ChatPromptSettings +from esbmc_ai.user_chat import UserChat @pytest.fixture diff --git a/upload.sh b/upload.sh new file mode 100755 index 0000000..73274f2 --- /dev/null +++ b/upload.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env sh + + +echo "Upload to PyPi" + +while true; do + read -p "Choose repo (pypi, testpypi): " -r choice + case "$choice" in + "pypi"|"testpypi") + break + ;; + *) + echo "Wrong option" + ;; + esac +done + +echo "For username, type __token__ if you want to use a token. You will only be asked if the information is not in the ~/.pypi file." + +python3 -m twine upload --skip-existing --repository pypi dist/* +