Skip to content

Commit 673db14

Browse files
authored
Merge pull request #76 from FalkorDB/staging
Staging
2 parents 277cdae + 7589bdd commit 673db14

File tree

8 files changed

+214
-107
lines changed

8 files changed

+214
-107
lines changed

api/analyzers/analyzer.py

+45-3
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,54 @@ def find_parent(self, node: Node, parent_types: list) -> Node:
1616
while node and node.type not in parent_types:
1717
node = node.parent
1818
return node
19+
20+
@abstractmethod
21+
def is_dependency(self, file_path: str) -> bool:
22+
"""
23+
Check if the file is a dependency.
24+
25+
Args:
26+
file_path (str): The file path.
27+
28+
Returns:
29+
bool: True if the file is a dependency, False otherwise.
30+
"""
31+
32+
pass
33+
34+
@abstractmethod
35+
def resolve_path(self, file_path: str, path: Path) -> str:
36+
"""
37+
Resolve the path of the file.
38+
39+
Args:
40+
file_path (str): The file path.
41+
path (Path): The path to the folder.
42+
43+
Returns:
44+
str: The resolved path.
45+
"""
1946

20-
def resolve(self, files: dict[Path, File], lsp: SyncLanguageServer, path: Path, node: Node) -> list[tuple[File, Node]]:
47+
pass
48+
49+
def resolve(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[tuple[File, Node]]:
2150
try:
22-
return [(files[Path(location['absolutePath'])], files[Path(location['absolutePath'])].tree.root_node.descendant_for_point_range(Point(location['range']['start']['line'], location['range']['start']['character']), Point(location['range']['end']['line'], location['range']['end']['character']))) for location in lsp.request_definition(str(path), node.start_point.row, node.start_point.column) if location and Path(location['absolutePath']) in files]
51+
locations = lsp.request_definition(str(file_path), node.start_point.row, node.start_point.column)
52+
return [(files[Path(self.resolve_path(location['absolutePath'], path))], files[Path(self.resolve_path(location['absolutePath'], path))].tree.root_node.descendant_for_point_range(Point(location['range']['start']['line'], location['range']['start']['character']), Point(location['range']['end']['line'], location['range']['end']['character']))) for location in locations if location and Path(self.resolve_path(location['absolutePath'], path)) in files]
2353
except Exception as e:
2454
return []
55+
56+
@abstractmethod
57+
def add_dependencies(self, path: Path, files: dict[Path, File]):
58+
"""
59+
Add dependencies to the files.
60+
61+
Args:
62+
path (Path): The path to the folder.
63+
files (dict[Path, File]): The files.
64+
"""
65+
66+
pass
2567

2668
@abstractmethod
2769
def get_entity_label(self, node: Node) -> str:
@@ -85,7 +127,7 @@ def add_symbols(self, entity: Entity) -> None:
85127
pass
86128

87129
@abstractmethod
88-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, path: Path, key: str, symbol: Node) -> Entity:
130+
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
89131
"""
90132
Resolve a symbol to an entity.
91133

api/analyzers/c/analyzer.py

+74-74
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# import io
21
# import os
3-
# from ..utils import *
42
# from pathlib import Path
3+
4+
# from multilspy import SyncLanguageServer
55
# from ...entities import *
66
# from ...graph import Graph
77
# from typing import Optional
@@ -17,6 +17,30 @@
1717
# def __init__(self) -> None:
1818
# super().__init__(Language(tsc.language()))
1919

20+
# def get_entity_label(self, node: Node) -> str:
21+
# if node.type == 'struct_specifier':
22+
# return "Struct"
23+
# elif node.type == 'function_definition':
24+
# return "Function"
25+
# raise ValueError(f"Unknown entity type: {node.type}")
26+
27+
# def get_entity_name(self, node: Node) -> str:
28+
# if node.type in ['struct_specifier', 'function_definition']:
29+
# return node.child_by_field_name('name').text.decode('utf-8')
30+
# raise ValueError(f"Unknown entity type: {node.type}")
31+
32+
# def get_entity_docstring(self, node: Node) -> Optional[str]:
33+
# if node.type in ['struct_specifier', 'function_definition']:
34+
# body = node.child_by_field_name('body')
35+
# if body.child_count > 0 and body.children[0].type == 'expression_statement':
36+
# docstring_node = body.children[0].child(0)
37+
# return docstring_node.text.decode('utf-8')
38+
# return None
39+
# raise ValueError(f"Unknown entity type: {node.type}")
40+
41+
# def get_entity_types(self) -> list[str]:
42+
# return ['struct_specifier', 'function_definition']
43+
2044
# def process_pointer_declaration(self, node: Node) -> tuple[str, int]:
2145
# """
2246
# Processes a pointer declaration node to determine the argument name and pointer count.
@@ -313,78 +337,54 @@
313337
# # Connect parent to entity
314338
# graph.connect_entities('DEFINES', parent.id, entity.id)
315339

316-
# def first_pass(self, path: Path, graph:Graph) -> None:
317-
# """
318-
# Perform the first pass processing of a C source file or header file.
319-
320-
# Args:
321-
# path (Path): The path to the C source file or header file.
322-
# f (io.TextIOWrapper): The file object representing the opened C source file or header file.
323-
# graph (Graph): The Graph object where entities will be added.
324-
325-
# Returns:
326-
# None
327-
328-
# Raises:
329-
# None
330-
331-
# This function processes the specified C source file or header file to extract and add function definitions
332-
# and struct definitions to the provided graph object.
333-
334-
# - If the file path does not end with '.c' or '.h', it logs a debug message and skips processing.
335-
# - It creates a File entity representing the file and adds it to the graph.
336-
# - It parses the file content using a parser instance (`self.parser`).
337-
# - Function definitions and struct definitions are extracted using Tree-sitter queries.
338-
# - Each function definition is processed using `self.process_function_definition`.
339-
# - Each struct definition is processed using `self.process_struct_specifier`.
340-
# """
341-
342-
# if path.suffix != '.c' and path.suffix != '.h':
343-
# logger.debug(f"Skipping none C file {path}")
344-
# return
345-
346-
# logger.info(f"Processing {path}")
347-
348-
# # Create file entity
349-
# file = File(os.path.dirname(path), path.name, path.suffix)
350-
# graph.add_file(file)
351-
352-
# # Parse file
353-
# source_code = path.read_bytes()
354-
# tree = self.parser.parse(source_code)
355-
# try:
356-
# source_code = source_code.decode('utf-8')
357-
# except Exception as e:
358-
# logger.error(f"Failed decoding source code: {e}")
359-
# source_code = ''
360-
361-
# # Process function definitions
362-
# query = self.language.query("(function_definition) @function")
363-
# captures = query.captures(tree.root_node)
364-
# # captures: {'function':
365-
# # [<Node type=function_definition, start_point=(0, 0), end_point=(7, 1)>,
366-
# # <Node type=function_definition, start_point=(15, 0), end_point=(18, 1)>
367-
# # ]
368-
# # }
369-
370-
# if 'function' in captures:
371-
# functions = captures['function']
372-
# for node in functions:
373-
# self.process_function_definition(file, node, path, graph, source_code)
374-
375-
# # Process struct definitions
376-
# query = self.language.query("(struct_specifier) @struct")
377-
# captures = query.captures(tree.root_node)
378-
379-
# if 'struct' in captures:
380-
# structs = captures['struct']
381-
# # captures: {'struct':
382-
# # [
383-
# # <Node type=struct_specifier, start_point=(9, 0), end_point=(13, 1)>
384-
# # ]
385-
# # }
386-
# for node in structs:
387-
# self.process_struct_specifier(file, node, path, graph)
340+
# def add_symbols(self, entity: Entity) -> None:
341+
# if entity.node.type == 'struct_specifier':
342+
# superclasses = entity.node.child_by_field_name("superclasses")
343+
# if superclasses:
344+
# base_classes_query = self.language.query("(argument_list (_) @base_class)")
345+
# base_classes_captures = base_classes_query.captures(superclasses)
346+
# if 'base_class' in base_classes_captures:
347+
# for base_class in base_classes_captures['base_class']:
348+
# entity.add_symbol("base_class", base_class)
349+
# elif entity.node.type == 'function_definition':
350+
# query = self.language.query("(call) @reference.call")
351+
# captures = query.captures(entity.node)
352+
# if 'reference.call' in captures:
353+
# for caller in captures['reference.call']:
354+
# entity.add_symbol("call", caller)
355+
# query = self.language.query("(typed_parameter type: (_) @parameter)")
356+
# captures = query.captures(entity.node)
357+
# if 'parameter' in captures:
358+
# for parameter in captures['parameter']:
359+
# entity.add_symbol("parameters", parameter)
360+
# return_type = entity.node.child_by_field_name('return_type')
361+
# if return_type:
362+
# entity.add_symbol("return_type", return_type)
363+
364+
# def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, path: Path, node: Node) -> list[Entity]:
365+
# res = []
366+
# for file, resolved_node in self.resolve(files, lsp, path, node):
367+
# type_dec = self.find_parent(resolved_node, ['struct_specifier'])
368+
# res.append(file.entities[type_dec])
369+
# return res
370+
371+
# def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, path: Path, node: Node) -> list[Entity]:
372+
# res = []
373+
# for file, resolved_node in self.resolve(files, lsp, path, node):
374+
# method_dec = self.find_parent(resolved_node, ['function_definition'])
375+
# if not method_dec:
376+
# continue
377+
# if method_dec in file.entities:
378+
# res.append(file.entities[method_dec])
379+
# return res
380+
381+
# def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, path: Path, key: str, symbol: Node) -> Entity:
382+
# if key in ["parameters", "return_type"]:
383+
# return self.resolve_type(files, lsp, path, symbol)
384+
# elif key in ["call"]:
385+
# return self.resolve_method(files, lsp, path, symbol)
386+
# else:
387+
# raise ValueError(f"Unknown key {key}")
388388

389389
# def second_pass(self, path: Path, graph: Graph) -> None:
390390
# """

api/analyzers/java/analyzer.py

+39-8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import os
12
from pathlib import Path
3+
import subprocess
24
from ...entities import *
35
from typing import Optional
46
from ..analyzer import AbstractAnalyzer
@@ -8,13 +10,32 @@
810
import tree_sitter_java as tsjava
911
from tree_sitter import Language, Node
1012

13+
from xml.etree import ElementTree
14+
1115
import logging
1216
logger = logging.getLogger('code_graph')
1317

1418
class JavaAnalyzer(AbstractAnalyzer):
1519
def __init__(self) -> None:
1620
super().__init__(Language(tsjava.language()))
1721

22+
def add_dependencies(self, path: Path, files: dict[Path, File]):
23+
# if not Path("java-decompiler-engine-243.23654.153.jar").is_file():
24+
# subprocess.run(["wget", "https://www.jetbrains.com/intellij-repository/releases/com/jetbrains/intellij/java/java-decompiler-engine/243.23654.153/java-decompiler-engine-243.23654.153.jar"])
25+
subprocess.run(["rm", "-rf", f"{path}/temp_deps"])
26+
pom = ElementTree.parse(str(path) + '/pom.xml')
27+
for dependency in pom.findall('.//{http://maven.apache.org/POM/4.0.0}dependency'):
28+
groupId = dependency.find('{http://maven.apache.org/POM/4.0.0}groupId').text.replace('.', '/')
29+
artifactId = dependency.find('{http://maven.apache.org/POM/4.0.0}artifactId').text
30+
version = dependency.find('{http://maven.apache.org/POM/4.0.0}version').text
31+
# jar_path = f"{Path.home()}/.m2/repository/{groupId}/{artifactId}/{version}/{artifactId}-{version}.jar"
32+
jar_path = f"{Path.home()}/.m2/repository/{groupId}/{artifactId}/{version}/{artifactId}-{version}-sources.jar"
33+
34+
os.makedirs(f"{path}/temp_deps/{artifactId}-{version}", exist_ok=True)
35+
# subprocess.run(["java", "-jar", "java-decompiler-engine-243.23654.153.jar", "-hdc=0 -iib=1 -rsy=1 -rbr=1 -dgs=1 -din=1 -den=1 -asc=1 -bsm=1", jar_path, f"{path}/temp_deps/{artifactId}-{version}"])
36+
subprocess.run(["cp", jar_path, f"{artifactId}-{version}.jar"], cwd=f"{path}/temp_deps/{artifactId}-{version}")
37+
subprocess.run(["unzip", f"{artifactId}-{version}.jar"], cwd=f"{path}/temp_deps/{artifactId}-{version}")
38+
1839
def get_entity_label(self, node: Node) -> str:
1940
if node.type == 'class_declaration':
2041
return "Class"
@@ -75,28 +96,38 @@ def add_symbols(self, entity: Entity) -> None:
7596
entity.add_symbol("parameters", parameter)
7697
entity.add_symbol("return_type", entity.node.child_by_field_name('type'))
7798

78-
def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, path: Path, node: Node) -> list[Entity]:
99+
def is_dependency(self, file_path: str) -> bool:
100+
return ".jar" in file_path
101+
102+
def resolve_path(self, file_path: str, path: Path) -> str:
103+
if ".jar" in file_path:
104+
args = file_path.replace(".jar", "").replace(".class", ".java").split("/")
105+
targs = "/".join(["/".join(arg.split(".")) for arg in args[2:-1]])
106+
return f"{path}/temp_deps/{args[1]}/{targs}/{args[-1]}"
107+
return file_path
108+
109+
def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
79110
res = []
80-
for file, resolved_node in self.resolve(files, lsp, path, node):
111+
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
81112
type_dec = self.find_parent(resolved_node, ['class_declaration', 'interface_declaration', 'enum_declaration'])
82113
if type_dec in file.entities:
83114
res.append(file.entities[type_dec])
84115
return res
85116

86-
def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, path: Path, node: Node) -> list[Entity]:
117+
def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
87118
res = []
88-
for file, resolved_node in self.resolve(files, lsp, path, node.child_by_field_name('name')):
119+
for file, resolved_node in self.resolve(files, lsp, file_path, path, node.child_by_field_name('name')):
89120
method_dec = self.find_parent(resolved_node, ['method_declaration', 'constructor_declaration', 'class_declaration', 'interface_declaration', 'enum_declaration'])
90-
if method_dec.type in ['class_declaration', 'interface_declaration', 'enum_declaration']:
121+
if method_dec and method_dec.type in ['class_declaration', 'interface_declaration', 'enum_declaration']:
91122
continue
92123
if method_dec in file.entities:
93124
res.append(file.entities[method_dec])
94125
return res
95126

96-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, path: Path, key: str, symbol: Node) -> Entity:
127+
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
97128
if key in ["implement_interface", "base_class", "extend_interface", "parameters", "return_type"]:
98-
return self.resolve_type(files, lsp, path, symbol)
129+
return self.resolve_type(files, lsp, file_path, path, symbol)
99130
elif key in ["call"]:
100-
return self.resolve_method(files, lsp, path, symbol)
131+
return self.resolve_method(files, lsp, file_path, path, symbol)
101132
else:
102133
raise ValueError(f"Unknown key {key}")

api/analyzers/python/analyzer.py

+32-8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import subprocess
12
from multilspy import SyncLanguageServer
23
from pathlib import Path
34
from ...entities import *
@@ -13,6 +14,16 @@
1314
class PythonAnalyzer(AbstractAnalyzer):
1415
def __init__(self) -> None:
1516
super().__init__(Language(tspython.language()))
17+
18+
def add_dependencies(self, path: Path, files: dict[Path, File]):
19+
if Path(f"{path}/venv").is_dir():
20+
return
21+
subprocess.run(["python3", "-m", "venv", f"{path}/venv"])
22+
if Path(f"{path}/requirements.txt").is_file():
23+
subprocess.run([f"{path}/venv/bin/pip", "install", "-r", "requirements.txt"])
24+
if Path(f"{path}/pyproject.toml").is_file():
25+
subprocess.run([f"{path}/venv/bin/pip", "install", "poetry"])
26+
subprocess.run([f"{path}/venv/bin/poetry", "install"])
1627

1728
def get_entity_label(self, node: Node) -> str:
1829
if node.type == 'class_definition':
@@ -62,27 +73,40 @@ def add_symbols(self, entity: Entity) -> None:
6273
if return_type:
6374
entity.add_symbol("return_type", return_type)
6475

65-
def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, path: Path, node: Node) -> list[Entity]:
76+
def is_dependency(self, file_path: str) -> bool:
77+
return "venv" in file_path
78+
79+
def resolve_path(self, file_path: str, path: Path) -> str:
80+
return file_path
81+
82+
def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path, node: Node) -> list[Entity]:
6683
res = []
67-
for file, resolved_node in self.resolve(files, lsp, path, node):
84+
if node.type == 'attribute':
85+
node = node.child_by_field_name('attribute')
86+
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
6887
type_dec = self.find_parent(resolved_node, ['class_definition'])
69-
res.append(file.entities[type_dec])
88+
if type_dec in file.entities:
89+
res.append(file.entities[type_dec])
7090
return res
7191

72-
def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, path: Path, node: Node) -> list[Entity]:
92+
def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
7393
res = []
74-
for file, resolved_node in self.resolve(files, lsp, path, node):
94+
if node.type == 'call':
95+
node = node.child_by_field_name('function')
96+
if node.type == 'attribute':
97+
node = node.child_by_field_name('attribute')
98+
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
7599
method_dec = self.find_parent(resolved_node, ['function_definition', 'class_definition'])
76100
if not method_dec:
77101
continue
78102
if method_dec in file.entities:
79103
res.append(file.entities[method_dec])
80104
return res
81105

82-
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, path: Path, key: str, symbol: Node) -> Entity:
106+
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
83107
if key in ["base_class", "parameters", "return_type"]:
84-
return self.resolve_type(files, lsp, path, symbol)
108+
return self.resolve_type(files, lsp, file_path, path, symbol)
85109
elif key in ["call"]:
86-
return self.resolve_method(files, lsp, path, symbol)
110+
return self.resolve_method(files, lsp, file_path, path, symbol)
87111
else:
88112
raise ValueError(f"Unknown key {key}")

0 commit comments

Comments
 (0)