Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Scale quantile via DAAL #644

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions sdc/_daal.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
//*****************************************************************************
// Copyright (c) 2020, Intel Corporation All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
// EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//*****************************************************************************

#include <Python.h>
#include <daal.h>


using namespace daal;
using namespace daal::algorithms;
using namespace daal::data_management;
using namespace daal::services;


extern "C"
{

int test(int x)
{
return x + 42;
}

double sum(double *p, int c)
{
double result = 0.0;
for (int i = 0; i < c; ++i)
{
result += p[i];
}
return result;
}

double median(int nRows, double *ptr)
{
quantiles::Batch<double> algorithm;
algorithm.input.set(quantiles::data, HomogenNumericTable<double>::create(ptr, 1, nRows));
algorithm.compute();
return algorithm.getResult()->get(quantiles::quantiles)->getValue<double>(0, 0);
}

void quantile(const double * data, const __int64_t nFeatures, const __int64_t nVectors,
const __int64_t quantOrderN, const double * quantOrder,
double * quants)
{
Environment::getInstance()->setNumberOfThreads(4); // does not affect

// quantiles::Batch<> algorithm;
quantiles::Batch<double> algorithm; // 2 times faster
algorithm.parameter.quantileOrders = HomogenNumericTable<double>::create(quantOrder, 1, quantOrderN);

algorithm.input.set(quantiles::data, HomogenNumericTable<double>::create(data, nFeatures, nVectors));

algorithm.compute();

auto out_table = algorithm.getResult()->get(quantiles::quantiles);
for (int i = 0; i < quantOrderN; ++i)
quants[i] = out_table->getValue<double>(0, i);
}

PyMODINIT_FUNC PyInit_daal()
{
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"daal",
"No docs",
-1,
NULL,
};
PyObject* m = PyModule_Create(&moduledef);
if (m == NULL)
{
return NULL;
}

#define REGISTER(func) PyObject_SetAttrString(m, #func, PyLong_FromVoidPtr((void*)(&func)));
REGISTER(test)
REGISTER(sum)
REGISTER(median)
REGISTER(quantile)
#undef REGISTER
return m;
}

} // extern "C"
69 changes: 69 additions & 0 deletions sdc/daal_overloads.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# *****************************************************************************
# Copyright (c) 2020, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

import numba
import ctypes as ct

from numba import types
from numba.extending import overload

# from numba import typing, generated_jit
# from numba.extending import models, register_model
# from numba.extending import lower_builtin, overload_method, intrinsic

# from llvmlite import ir as lir
import llvmlite.binding as ll

from . import daal


ll.add_symbol('test', daal.test)
ll.add_symbol('sum', daal.sum)


_test = types.ExternalFunction("test", types.int_(types.int_))
_sum = types.ExternalFunction("sum", types.float64(types.voidptr, types.int_))


def test(x):
pass


@overload(test)
def test_overload(x):
return lambda x: _test(x)


functype_test = ct.CFUNCTYPE(ct.c_int, ct.c_int)
ctypes_test = functype_test(daal.test)

# functype_sum = ct.CFUNCTYPE(ct.c_double, ct.POINTER(ct.c_double), ct.c_int)
functype_sum = ct.CFUNCTYPE(ct.c_double, ct.c_void_p, ct.c_int)
ctypes_sum = functype_sum(daal.sum)


median = ct.CFUNCTYPE(ct.c_double, ct.c_int, ct.c_void_p)(daal.median)
quantile = ct.CFUNCTYPE(ct.c_double, ct.c_int, ct.c_void_p, ct.c_double)(daal.quantile)
5 changes: 3 additions & 2 deletions sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
from sdc.functions import numpy_like
from sdc.hiframes.api import isna
from sdc.datatypes.hpat_pandas_groupby_functions import init_series_groupby
from sdc.daal_overloads import quantile as daal_quantile

from .pandas_series_functions import apply
from .pandas_series_functions import map as _map
Expand Down Expand Up @@ -3361,8 +3362,8 @@ def hpat_pandas_series_quantile(self, q=0.5, interpolation='linear'):
ty_checker.raise_exc(q, 'int, float, list', 'q')

def hpat_pandas_series_quantile_impl(self, q=0.5, interpolation='linear'):

return numpy.quantile(self._data, q)
# return numpy.quantile(self._data, q)
return daal_quantile(len(self._data), self._data.ctypes, q)

return hpat_pandas_series_quantile_impl

Expand Down
1 change: 1 addition & 0 deletions sdc/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@

from sdc.tests.test_hpat_jit import *

from sdc.tests.test_daal import *
from sdc.tests.test_sdc_numpy import *
from sdc.tests.test_prange_utils import *

Expand Down
77 changes: 77 additions & 0 deletions sdc/tests/test_daal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# *****************************************************************************
# Copyright (c) 2020, Intel Corporation All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

import numpy as np
import ctypes

from sdc.tests.test_base import TestCase

from sdc.daal_overloads import test, ctypes_test, ctypes_sum, quantile


class TestDaal(TestCase):

def test_test(self):
def pyfunc():
return test(10)

def ctypes_pyfunc():
return ctypes_test(10)

cfunc = self.jit(pyfunc)
ctypes_cfunc = self.jit(ctypes_pyfunc)
# self.assertEqual(cfunc(), pyfunc())
self.assertEqual(cfunc(), ctypes_pyfunc())
self.assertEqual(ctypes_cfunc(), ctypes_pyfunc())

def test_sum(self):
def pyfunc(arr):
# return ctypes_sum(arr.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), len(arr))
return ctypes_sum(arr.ctypes, len(arr))
cfunc = self.jit(pyfunc)

arr = np.arange(10, dtype=np.float64)
expected = np.sum(arr)

# print(ctypes_sum.argtypes)

self.assertEqual(pyfunc(arr), expected)
self.assertEqual(cfunc(arr), expected)

def test_quantile(self):
def pyfunc(arr, q):
return quantile(len(arr), arr.ctypes, q)
cfunc = self.jit(pyfunc)

arr = np.arange(10, dtype=np.float64)

# print(ctypes_sum.argtypes)

for q in [0., 0.25, 0.5, 0.75, 1.]:
with self.subTest(q=q):
expected = np.quantile(arr, q)
self.assertEqual(pyfunc(arr, q), expected)
self.assertEqual(cfunc(arr, q), expected)
18 changes: 18 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,24 @@ def readme():
if _has_opencv:
_ext_mods.append(ext_cv_wrapper)

daal_root = "/localdisk/spokhode/miniconda3/envs/sdc-env"

ext_daal = Extension(name="sdc.daal",
sources=["sdc/_daal.cpp"],
include_dirs=[os.path.join(daal_root, 'include')],
libraries=['daal_core', 'daal_thread'],
library_dirs=[
# for Linux
os.path.join(daal_root, 'lib', 'intel64', 'gcc4.4'),
# for MacOS
os.path.join(daal_root, 'lib'),
# for Windows
os.path.join(daal_root, 'lib', 'intel64', 'vc_mt'),
],
language="c++",
)

_ext_mods.append(ext_daal)

class style(Command):
""" Command to check and adjust code style
Expand Down