Skip to content

Commit

Permalink
port over schema extraction poc
Browse files Browse the repository at this point in the history
  • Loading branch information
mistermoe committed Nov 6, 2024
1 parent c409932 commit e36dad9
Show file tree
Hide file tree
Showing 21 changed files with 808 additions and 215 deletions.
Empty file added backend/protos/__init__.py
Empty file.
20 changes: 20 additions & 0 deletions examples/python/echo/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions python-runtime/compile/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ func Build(ctx context.Context, projectRootDir, stubsRoot string, config modulec

buildDir := buildDir(config.Dir)

// TODO: call the python schema extractor. grab the output of le script. unmarshal into schema proto. unmarshal that into go type. return
// same with build errors

if err := internal.ScaffoldZip(buildTemplateFiles(), buildDir, mctx, scaffolder.Functions(scaffoldFuncs)); err != nil {
return moduleSch, nil, fmt.Errorf("failed to scaffold build template: %w", err)
}
Expand Down
1 change: 1 addition & 0 deletions python-runtime/ftl/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ authors = [
requires-python = ">=3.12"
dependencies = [
"protobuf>=5.28.3",
"google>=3.0.0"
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion python-runtime/ftl/src/ftl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .decorators import verb
from .verb import verb

__all__ = ["verb"]
Empty file.
105 changes: 105 additions & 0 deletions python-runtime/ftl/src/ftl/cli/schema_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import argparse
import ast
import concurrent.futures
import os
import sys
from contextlib import contextmanager

from ftl.extract import (
GlobalExtractionContext,
TransitiveExtractor,
)
from ftl.verb import (
VerbExtractor,
)

# analyzers is now a list of lists, where each sublist contains analyzers that can run in parallel
analyzers = [
[VerbExtractor],
[TransitiveExtractor],
]


@contextmanager
def set_analysis_mode(path):
original_sys_path = sys.path.copy()
sys.path.append(path)
try:
yield
finally:
sys.path = original_sys_path


def analyze_directory(module_dir):
"""Analyze all Python files in the given module_dir in parallel."""
global_ctx = GlobalExtractionContext()

file_paths = []
for dirpath, _, filenames in os.walk(module_dir):
for filename in filenames:
if filename.endswith(".py"):
file_paths.append(os.path.join(dirpath, filename))

for analyzer_batch in analyzers:
with concurrent.futures.ProcessPoolExecutor() as executor:
future_to_file = {
executor.submit(
analyze_file, global_ctx, file_path, analyzer_batch
): file_path
for file_path in file_paths
}

for future in concurrent.futures.as_completed(future_to_file):
file_path = future_to_file[future]
try:
future.result() # raise any exception that occurred in the worker process
except Exception as exc:
print(f"failed to extract schema from {file_path}: {exc};")
# else:
# print(f"File {file_path} analyzed successfully.")

for ref_key, decl in global_ctx.deserialize().items():
print(f"Extracted Decl:\n{decl}")


def analyze_file(global_ctx: GlobalExtractionContext, file_path, analyzer_batch):
"""Analyze a single Python file using multiple analyzers in parallel."""
module_name = os.path.splitext(os.path.basename(file_path))[0]
file_ast = ast.parse(open(file_path).read())
local_ctx = global_ctx.init_local_context()

with concurrent.futures.ThreadPoolExecutor() as executor:
futures = [
executor.submit(
run_analyzer,
analyzer_class,
local_ctx,
module_name,
file_path,
file_ast,
)
for analyzer_class in analyzer_batch
]

for future in concurrent.futures.as_completed(futures):
try:
future.result()
except Exception as exc:
print(f"Analyzer generated an exception: {exc} in {file_path}")


def run_analyzer(analyzer_class, context, module_name, file_path, file_ast):
analyzer = analyzer_class(context, module_name, file_path)
analyzer.visit(file_ast)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"module_dir", type=str, help="The Python module directory to analyze."
)
args = parser.parse_args()

dir = args.module_dir
with set_analysis_mode(dir):
analyze_directory(dir)
3 changes: 0 additions & 3 deletions python-runtime/ftl/src/ftl/decorators/__init__.py

This file was deleted.

30 changes: 0 additions & 30 deletions python-runtime/ftl/src/ftl/decorators/verb.py

This file was deleted.

22 changes: 22 additions & 0 deletions python-runtime/ftl/src/ftl/extract/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from .common import (
extract_basic_type,
extract_class_type,
extract_function_type,
extract_map,
extract_slice,
extract_type,
)
from .context import GlobalExtractionContext, LocalExtractionContext
from .transitive import TransitiveExtractor

__all__ = [
"extract_type",
"extract_slice",
"extract_map",
"extract_basic_type",
"extract_class_type",
"extract_function_type",
"LocalExtractionContext",
"GlobalExtractionContext",
"TransitiveExtractor",
]
99 changes: 99 additions & 0 deletions python-runtime/ftl/src/ftl/extract/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from typing import Any, Dict, List, Optional, Type

from ftl.protos.xyz.block.ftl.v1.schema import schema_pb2 as schemapb

from .context import LocalExtractionContext


def extract_type(
local_ctx: LocalExtractionContext, type_hint: Type[Any]
) -> Optional[schemapb.Type]:
"""Extracts type information from Python type hints and maps it to schema types."""
if isinstance(type_hint, list):
return extract_slice(local_ctx, type_hint)

elif isinstance(type_hint, dict):
return extract_map(local_ctx, type_hint)

elif type_hint is Any:
return schemapb.Type(any=schemapb.Any())

elif isinstance(type_hint, type):
if (
type_hint is str
or type_hint is int
or type_hint is bool
or type_hint is float
):
return extract_basic_type(type_hint)

if hasattr(type_hint, "__bases__"):
return extract_class_type(local_ctx, type_hint)

if callable(type_hint):
return extract_function_type(local_ctx, type_hint)

# Handle parametric types (e.g., List[int], Dict[str, int]) - Optional, uncomment if needed
# elif hasattr(type_hint, "__origin__"):
# return extract_parametric_type(local_ctx, type_hint)

# TODO: raise exception for unsupported types
return None


def extract_slice(
local_ctx: LocalExtractionContext, type_hint: List[Any]
) -> Optional[schemapb.Type]:
if isinstance(type_hint, list) and type_hint:
element_type = extract_type(local_ctx, type_hint[0]) # Assuming non-empty list
if element_type:
return schemapb.Type(array=schemapb.Array(element=element_type))
return None


def extract_map(
local_ctx: LocalExtractionContext, type_hint: Dict[Any, Any]
) -> Optional[schemapb.Type]:
if isinstance(type_hint, dict):
key_type = extract_type(local_ctx, list(type_hint.keys())[0])
value_type = extract_type(local_ctx, list(type_hint.values())[0])
if key_type and value_type:
return schemapb.Type(map=schemapb.Map(key=key_type, value=value_type))
return None


def extract_basic_type(type_hint: Type[Any]) -> Optional[schemapb.Type]:
type_map = {
str: schemapb.Type(string=schemapb.String()),
int: schemapb.Type(int=schemapb.Int()),
bool: schemapb.Type(bool=schemapb.Bool()),
float: schemapb.Type(float=schemapb.Float()),
}
return type_map.get(type_hint, None)


# Uncomment and implement parametric types if needed
# def extract_parametric_type(local_ctx: LocalExtractionContext, type_hint: Type[Any]) -> Optional[schemapb.Type]:
# if hasattr(type_hint, "__args__"):
# base_type = extract_type(local_ctx, type_hint.__origin__)
# param_types = [extract_type(local_ctx, arg) for arg in type_hint.__args__]
# if isinstance(base_type, schemapb.Ref):
# base_type.type_parameters.extend(param_types)
# return base_type
# return None


def extract_class_type(
local_ctx: LocalExtractionContext, type_hint: Type[Any]
) -> Optional[schemapb.Type]:
ref = schemapb.Ref(name=type_hint.__name__, module=type_hint.__module__)
local_ctx.add_needs_extraction(ref)
return schemapb.Type(ref=ref)


def extract_function_type(
local_ctx: LocalExtractionContext, type_hint: Type[Any]
) -> Optional[schemapb.Type]:
ref = schemapb.Ref(name=type_hint.__name__, module=type_hint.__module__)
local_ctx.add_needs_extraction(ref)
return schemapb.Type(ref=ref)
Loading

0 comments on commit e36dad9

Please sign in to comment.