Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(API): Refactor datasets API #2439

Merged
merged 15 commits into from
Sep 18, 2024
Merged
71 changes: 54 additions & 17 deletions api/apps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,40 +18,68 @@
import sys
from importlib.util import module_from_spec, spec_from_file_location
from pathlib import Path
from flask import Blueprint, Flask
from werkzeug.wrappers.request import Request
from typing import Union

from apiflask import APIFlask, APIBlueprint, HTTPTokenAuth
from flask_cors import CORS
from flask_login import LoginManager
from flask_session import Session
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
from werkzeug.wrappers.request import Request

from api.db import StatusEnum
from api.db.db_models import close_connection
from api.db.db_models import close_connection, APIToken
from api.db.services import UserService
from api.utils import CustomJSONEncoder, commands

from flask_session import Session
from flask_login import LoginManager
from api.settings import API_VERSION, access_logger, RAG_FLOW_SERVICE_NAME
from api.settings import SECRET_KEY, stat_logger
from api.settings import API_VERSION, access_logger
from api.utils import CustomJSONEncoder, commands
from api.utils.api_utils import server_error_response
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer

__all__ = ['app']


logger = logging.getLogger('flask.app')
for h in access_logger.handlers:
logger.addHandler(h)

Request.json = property(lambda self: self.get_json(force=True, silent=True))

app = Flask(__name__)
CORS(app, supports_credentials=True,max_age=2592000)
# Integrate APIFlask: Flask class -> APIFlask class.
app = APIFlask(__name__, title=RAG_FLOW_SERVICE_NAME, version=API_VERSION, docs_path=f'/{API_VERSION}/docs')
# Integrate APIFlask: Use apiflask.HTTPTokenAuth for the HTTP Bearer or API Keys authentication.
http_token_auth = HTTPTokenAuth()


# Current logged-in user class
class AuthUser:
def __init__(self, tenant_id, token):
self.id = tenant_id
self.token = token

def get_token(self):
return self.token


# Verify if the token is valid
@http_token_auth.verify_token
def verify_token(token: str) -> Union[AuthUser, None]:
try:
objs = APIToken.query(token=token)
if objs:
api_token = objs[0]
user = AuthUser(api_token.tenant_id, api_token.token)
return user
except Exception as e:
server_error_response(e)
return None


CORS(app, supports_credentials=True, max_age=2592000)
app.url_map.strict_slashes = False
app.json_encoder = CustomJSONEncoder
app.errorhandler(Exception)(server_error_response)


## convince for dev and debug
#app.config["LOGIN_DISABLED"] = True
# app.config["LOGIN_DISABLED"] = True
app.config["SESSION_PERMANENT"] = False
app.config["SESSION_TYPE"] = "filesystem"
app.config['MAX_CONTENT_LENGTH'] = int(os.environ.get("MAX_CONTENT_LENGTH", 128 * 1024 * 1024))
Expand All @@ -66,7 +94,9 @@
def search_pages_path(pages_dir):
app_path_list = [path for path in pages_dir.glob('*_app.py') if not path.name.startswith('.')]
api_path_list = [path for path in pages_dir.glob('*sdk/*.py') if not path.name.startswith('.')]
restful_api_path_list = [path for path in pages_dir.glob('*apis/*.py') if not path.name.startswith('.')]
app_path_list.extend(api_path_list)
app_path_list.extend(restful_api_path_list)
return app_path_list


Expand All @@ -79,11 +109,17 @@ def register_page(page_path):
spec = spec_from_file_location(module_name, page_path)
page = module_from_spec(spec)
page.app = app
page.manager = Blueprint(page_name, module_name)
# Integrate APIFlask: Blueprint class -> APIBlueprint class
page.manager = APIBlueprint(page_name, module_name)
sys.modules[module_name] = page
spec.loader.exec_module(page)
page_name = getattr(page, 'page_name', page_name)
url_prefix = f'/api/{API_VERSION}/{page_name}' if "/sdk/" in path else f'/{API_VERSION}/{page_name}'
if "/sdk/" in path or "/apis/" in path:
url_prefix = f'/api/{API_VERSION}/{page_name}'
# elif "/apis/" in path:
# url_prefix = f'/{API_VERSION}/api/{page_name}'
else:
url_prefix = f'/{API_VERSION}/{page_name}'

app.register_blueprint(page.manager, url_prefix=url_prefix)
return url_prefix
Expand All @@ -93,6 +129,7 @@ def register_page(page_path):
Path(__file__).parent,
Path(__file__).parent.parent / 'api' / 'apps',
Path(__file__).parent.parent / 'api' / 'apps' / 'sdk',
Path(__file__).parent.parent / 'api' / 'apps' / 'apis',
]

client_urls_prefix = [
Expand Down Expand Up @@ -123,4 +160,4 @@ def load_user(web_request):

@app.teardown_request
def _db_close(exc):
close_connection()
close_connection()
Empty file added api/apps/apis/__init__.py
Empty file.
96 changes: 96 additions & 0 deletions api/apps/apis/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from api.apps import http_token_auth
from api.apps.services import dataset_service
from api.utils.api_utils import server_error_response, http_basic_auth_required


@manager.post('')
@manager.input(dataset_service.CreateDatasetReq, location='json')
@manager.auth_required(http_token_auth)
def create_dataset(json_data):
"""Creates a new Dataset(Knowledgebase)."""
try:
tenant_id = http_token_auth.current_user.id
return dataset_service.create_dataset(tenant_id, json_data)
except Exception as e:
return server_error_response(e)


@manager.put('')
@manager.input(dataset_service.UpdateDatasetReq, location='json')
@manager.auth_required(http_token_auth)
def update_dataset(json_data):
"""Updates a Dataset(Knowledgebase)."""
try:
tenant_id = http_token_auth.current_user.id
return dataset_service.update_dataset(tenant_id, json_data)
except Exception as e:
return server_error_response(e)


@manager.get('/<string:kb_id>')
@manager.auth_required(http_token_auth)
def get_dataset_by_id(kb_id):
"""Query Dataset(Knowledgebase) by Dataset(Knowledgebase) ID."""
try:
tenant_id = http_token_auth.current_user.id
return dataset_service.get_dataset_by_id(tenant_id, kb_id)
except Exception as e:
return server_error_response(e)


@manager.get('/search')
@manager.input(dataset_service.SearchDatasetReq, location='query')
@manager.auth_required(http_token_auth)
def get_dataset_by_name(query_data):
"""Query Dataset(Knowledgebase) by Dataset(Knowledgebase) Name."""
try:
tenant_id = http_token_auth.current_user.id
return dataset_service.get_dataset_by_name(tenant_id, query_data["name"])
except Exception as e:
return server_error_response(e)


@manager.get('')
@manager.input(dataset_service.QueryDatasetReq, location='query')
@http_basic_auth_required
@manager.auth_required(http_token_auth)
def get_all_datasets(query_data):
"""Query all Datasets(Knowledgebase)"""
try:
tenant_id = http_token_auth.current_user.id
return dataset_service.get_all_datasets(
tenant_id,
query_data['page'],
query_data['page_size'],
query_data['orderby'],
query_data['desc'],
)
except Exception as e:
return server_error_response(e)


@manager.delete('/<string:kb_id>')
@manager.auth_required(http_token_auth)
def delete_dataset(kb_id):
"""Deletes a Dataset(Knowledgebase)."""
try:
tenant_id = http_token_auth.current_user.id
return dataset_service.delete_dataset(tenant_id, kb_id)
except Exception as e:
return server_error_response(e)
64 changes: 64 additions & 0 deletions api/apps/apis/documents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from api.apps import http_token_auth
from api.apps.services import document_service
from api.utils.api_utils import server_error_response


@manager.route('/change_parser', methods=['POST'])
@manager.input(document_service.ChangeDocumentParserReq, location='json')
@manager.auth_required(http_token_auth)
def change_document_parser(json_data):
"""Change document file parser."""
try:
return document_service.change_document_parser(json_data)
except Exception as e:
return server_error_response(e)


@manager.route('/run', methods=['POST'])
@manager.input(document_service.RunParsingReq, location='json')
@manager.auth_required(http_token_auth)
def run_parsing(json_data):
"""Run parsing documents file."""
try:
return document_service.run_parsing(json_data)
except Exception as e:
return server_error_response(e)


@manager.post('/upload')
@manager.input(document_service.UploadDocumentsReq, location='form_and_files')
@manager.auth_required(http_token_auth)
def upload_documents_2_dataset(form_and_files_data):
"""Upload documents file a Dataset(Knowledgebase)."""
try:
tenant_id = http_token_auth.current_user.id
return document_service.upload_documents_2_dataset(form_and_files_data, tenant_id)
except Exception as e:
return server_error_response(e)


@manager.get('')
@manager.input(document_service.QueryDocumentsReq, location='query')
@manager.auth_required(http_token_auth)
def get_all_documents(query_data):
"""Query documents file in Dataset(Knowledgebase)."""
try:
tenant_id = http_token_auth.current_user.id
return document_service.get_all_documents(query_data, tenant_id)
except Exception as e:
return server_error_response(e)
Empty file added api/apps/services/__init__.py
Empty file.
Loading