Skip to content

Commit

Permalink
Merge pull request #241 from Sprate/feature
Browse files Browse the repository at this point in the history
add  separate compilation for feature engineering
  • Loading branch information
honshj authored Dec 4, 2021
2 parents b110ae5 + 21bfc16 commit 524c3dd
Show file tree
Hide file tree
Showing 26 changed files with 321 additions and 116 deletions.
5 changes: 1 addition & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ if (USE_ABY3_TRUNC1)
endif(USE_ABY3_TRUNC1)

add_subdirectory(core/common)
add_subdirectory(feature/he)
add_subdirectory(core/he)
if (NOT WITH_GPU)
add_subdirectory(core/privc)
endif()
Expand Down Expand Up @@ -171,9 +171,6 @@ install(DIRECTORY "${THIRD_PARTY_PATH}/install/openssl/lib/"
install(TARGETS paddle_enc mpc_data_utils
LIBRARY DESTINATION ${PADDLE_ENCRYPTED_LIB_PATH}
LIBRARY DESTINATION ${PADDLE_ENCRYPTED_LIB_PATH})

set(FEATURE_LIB "${CMAKE_SOURCE_DIR}/feature/python/libs")
install(TARGETS he_utils LIBRARY DESTINATION ${FEATURE_LIB})

if (WITH_PSI)
install(TARGETS psi LIBRARY DESTINATION ${PADDLE_ENCRYPTED_LIB_PATH})
Expand Down
1 change: 1 addition & 0 deletions cmake/external/grpc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc)
SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
SET(GRPC_PYTHON_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_python_plugin" CACHE FILEPATH "GRPC_PYTHON_PLUGIN" FORCE)

include(ProcessorCount)
ProcessorCount(NUM_OF_PROCESSOR)
Expand Down
30 changes: 30 additions & 0 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,36 @@ function(py_proto_compile TARGET_NAME)
add_custom_target(${TARGET_NAME} ALL DEPENDS ${py_srcs} protobuf)
endfunction()

function(py_proto_grpc_compile TARGET_NAME)
set(oneValueArgs PROTO PROTO_PATH)
set(multiValueArgs SRCS)
set(options "")
cmake_parse_arguments(py_proto_grpc_compile "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

get_filename_component(ABS_PROTO ${py_proto_grpc_compile_PROTO} ABSOLUTE)
get_filename_component(PROTO_WE ${py_proto_grpc_compile_PROTO} NAME_WE)
#get_filename_component(PROTO_PATH ${ABS_PROTO} PATH)
set(PROTO_PATH ${py_proto_grpc_compile_PROTO_PATH})

set(grpc_proto_py "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}_pb2.py")
set(grpc_grpc_py "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}_pb2_grpc.py")

set(py_srcs)
list(APPEND py_srcs "${grpc_proto_py}" "${grpc_grpc_py}")

add_custom_command(
OUTPUT "${grpc_proto_py}" "${grpc_grpc_py}"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
ARGS --grpc_python_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}"
--plugin=protoc-gen-grpc_python="${GRPC_PYTHON_PLUGIN}" "${ABS_PROTO}"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
ARGS --python_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}"
"${ABS_PROTO}"
DEPENDS "${ABS_PROTO}" ${PROTOBUF_PROTOC_EXECUTABLE} extern_grpc)

add_custom_target(${TARGET_NAME} ALL DEPENDS ${py_srcs} protobuf extern_grpc)
endfunction()

function(py_test TARGET_NAME)
if(WITH_TESTING)
set(options "")
Expand Down
24 changes: 24 additions & 0 deletions core/he/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
set(PYBIND_HE_SRCS
"./he_utils.cc"
"./paillier.cc"
)

if (NOT PYTHON_INCLUDE_DIRS)
find_package(PythonLibs REQUIRED)
endif()

include_directories(${PYTHON_INCLUDE_DIRS})

add_library(he_utils MODULE ${PYBIND_HE_SRCS})
target_link_libraries(he_utils PRIVATE pybind gmp gmpxx)
set_target_properties(he_utils PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}")

set(FE_PROTO_MODULE_PATH "paddle_fl/feature_engineering/proto")
py_proto_grpc_compile(my_target PROTO "${CMAKE_SOURCE_DIR}/python/${FE_PROTO_MODULE_PATH}/metrics.proto"
PROTO_PATH "${CMAKE_SOURCE_DIR}/python")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${FE_PROTO_MODULE_PATH}/metrics_pb2.py"
"${CMAKE_CURRENT_BINARY_DIR}/${FE_PROTO_MODULE_PATH}/metrics_pb2_grpc.py"
DESTINATION "${CMAKE_SOURCE_DIR}/python/${FE_PROTO_MODULE_PATH}")

set(FEATURE_LIB "${CMAKE_SOURCE_DIR}/python/paddle_fl/feature_engineering/libs")
install(TARGETS he_utils LIBRARY DESTINATION ${FEATURE_LIB})
File renamed without changes.
File renamed without changes.
File renamed without changes.
14 changes: 0 additions & 14 deletions feature/he/CMakeLists.txt

This file was deleted.

54 changes: 0 additions & 54 deletions feature/python/example/README.md

This file was deleted.

95 changes: 95 additions & 0 deletions python/paddle_fl/feature_engineering/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
## 联邦特征工程

支持计算正样本占比、woe、iv

## 单独编译

### 环境准备
* CentOS 7 (64 bit) or Ubuntu 16.04
* Python 3.5/3.6/3.7 ( 64 bit) or above
* pip3 9.0.1+ (64 bit)
* GCC or G++ 8.2.0+
* cmake 3.15+
* grpcio
* grpcio-tools


### 克隆源码并安装

1.获取源代码
```sh
git clone https://github.com/PaddlePaddle/PaddleFL
cd /path/to/PaddleFL
mkdir build && cd build
```

2.执行部分编译指令(参照 docs/source/md/compile_and_install_cn.md )

```
cmake .. -DCMAKE_C_COMPILER=${gcc_path} -DCMAKE_CXX_COMPILER=${g++_path} -DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE} -DPYTHON_INCLUDE_DIRS=${PYTHON_INCLUDE_DIRS} -DBUILD_PADDLE_FROM_SOURCE=ON -DWITH_GRPC=ON -DWITH_GPU=OFF
```

```
cd core/he
make -j48
make install
```

3.pip打包并安装
```
cd /path/to/PaddleFL/python/paddle_fl
mkdir build && cd build
python3 ../feature_engineering/setup.py sdist bdist_wheel
pip3 install dist/paddle_fl_feature_engineering-1.2.0-py3-none-any.whl -U
```
## 跟随paddlefl编译
不久后将支持

## 测试

1.准备数据
```
cd /path/to/PaddleFL/python/paddle_fl/feature_engineering/example
python3 gen_test_file.py
```
简单测试: gen_simple_file 性能测试: gen_bench_file

2.生成证书
生成grpc证书 grpc secure channel 需要

```
openssl req -newkey rsa:2048 -nodes -keyout server.key -x509 -days 3650 -out server.crt
```
示例中定义Common Name 为 metrics_service 其余为空

在example目录下会生成 server.key server.crt

3.进行测试

服务器端:python3 metrics_test_server.py

客户端: python3 metrics_test_client.py

## 构建自己的程序

我们提供了pip打包支持,用户只需在自己的程序中 import paddle_fl.feature_engineering.core 即可,grpc通信模块可由用户自定义

示例如下:

channel: grpc client channel 自定义

server: grpc server 自定义

```
#client
from paddle_fl.feature_engineering.core.federated_feature_engineering_client import FederatedFeatureEngineeringClient
fed_fea_eng_client = FederatedFeatureEngineeringClient(1024)
fed_fea_eng_client.connect(channel)
result = fed_fea_eng_client.get_woe(labels)
#server
from paddle_fl.feature_engineering.core.federated_feature_engineering_server import FederatedFeatureEngineeringServer
fed_fea_eng_server = FederatedFeatureEngineeringServer()
fed_fea_eng_server.serve(server)
woe_list = fed_fea_eng_server.get_woe(features)
```
28 changes: 28 additions & 0 deletions python/paddle_fl/feature_engineering/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Import modules.
"""

import os
import sysconfig
import sys

he_utils_path = sysconfig.get_paths()["purelib"] + "/paddle_fl/feature_engineering/libs"
he_utils_lib = he_utils_path + '/he_utils.so'
sys.path.append(he_utils_path)
os.system('patchelf --set-rpath {} {}'.format(he_utils_path, he_utils_lib))

from . import core

Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,9 @@
"""
init
"""
from .federal_feature_engineering_client import FederalFeatureEngineeringClient
from .federal_feature_engineering_server import FederalFeatureEngineeringServer

from .federated_feature_engineering_client import FederatedFeatureEngineeringClient
from .federated_feature_engineering_server import FederatedFeatureEngineeringServer

from .metrics_client import *
from .metrics_server import *
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License
"""
Federal feature engineering client-side
Federated feature engineering client-side
support postive_ratio, woe, iv, ks, auc
"""

import metrics_client as mc
from . import metrics_client as mc

class FederalFeatureEngineeringClient(object):
class FederatedFeatureEngineeringClient(object):
"""
Federal feature engineering client-side implementation
Federated feature engineering client-side implementation
"""
def __init__(self, key_len=1024):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License
"""
Federal feature engineering server-side interface
Federated feature engineering server-side interface
support postive_ratio, woe, iv, ks, auc
"""

import threading
import metrics_server as ms
from . import metrics_server as ms

class FederalFeatureEngineeringServer(object):
class FederatedFeatureEngineeringServer(object):
"""
Federal feature engineering server-side implementation
Federated feature engineering server-side implementation
"""
def serve(self, server):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,9 @@
import numpy as np
import grpc

import sys
sys.path.append("../libs")

import he_utils as hu
import metrics_pb2
import metrics_pb2_grpc
from ..proto import metrics_pb2_grpc
from ..proto import metrics_pb2

__all__ = [
'get_mpc_postive_ratio_alice',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,9 @@
import numpy as np
import grpc

import sys
sys.path.append("../libs")

import he_utils as hu
import metrics_pb2
import metrics_pb2_grpc
from ..proto import metrics_pb2
from ..proto import metrics_pb2_grpc


class MpcPositiveRatioServicer(metrics_pb2_grpc.MpcPositiveRatioServicer):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 524c3dd

Please sign in to comment.