From ed7687f5b3f3e5dfa0d2824f4a06f21b99dc7454 Mon Sep 17 00:00:00 2001 From: drculhane Date: Thu, 14 Nov 2024 09:13:12 -0500 Subject: [PATCH 1/2] ReadyForPR --- arkouda/numpy/_numeric.py | 148 +++++----- arkouda/pdarrayclass.py | 20 +- src/EfuncMsg.chpl | 524 +++++++++++------------------------- tests/numpy/numeric_test.py | 56 +++- 4 files changed, 283 insertions(+), 465 deletions(-) diff --git a/arkouda/numpy/_numeric.py b/arkouda/numpy/_numeric.py index 1c4171e255..e25cf975e4 100644 --- a/arkouda/numpy/_numeric.py +++ b/arkouda/numpy/_numeric.py @@ -3,13 +3,13 @@ from typing import TYPE_CHECKING, List, Sequence, Tuple, TypeVar, Union from typing import cast as type_cast from typing import no_type_check - +from arkouda.groupbyclass import groupable import numpy as np from typeguard import typechecked from arkouda.client import generic_msg from arkouda.dtypes import str_ as akstr_ -from arkouda.groupbyclass import GroupBy, groupable +from arkouda.groupbyclass import GroupBy from arkouda.numpy.dtypes import DTypes, bigint from arkouda.numpy.dtypes import bool_ as ak_bool from arkouda.numpy.dtypes import dtype as akdtype @@ -26,13 +26,7 @@ from arkouda.numpy.dtypes import _datatype_check from arkouda.pdarrayclass import all as ak_all from arkouda.pdarrayclass import any as ak_any -from arkouda.pdarrayclass import ( - argmax, - broadcast_if_needed, - create_pdarray, - pdarray, - sum, -) +from arkouda.pdarrayclass import argmax, broadcast_if_needed, create_pdarray, pdarray, sum from arkouda.pdarraycreation import array, linspace, scalar_array from arkouda.sorting import sort from arkouda.strings import Strings @@ -119,7 +113,6 @@ def _merge_where(new_pda, where, ret): new_pda[where] = ret return new_pda - @typechecked def cast( pda: Union[pdarray, Strings, Categorical], # type: ignore @@ -249,10 +242,9 @@ def abs(pda: pdarray) -> pdarray: array([5, 4, 3, 2, 1]) """ repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"abs<{pda.dtype},{pda.ndim}>", args={ - "func": "abs", - "array": pda, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -282,10 +274,10 @@ def ceil(pda: pdarray) -> pdarray: >>> ak.ceil(ak.linspace(1.1,5.5,5)) array([2, 3, 4, 5, 6]) """ + _datatype_check(pda.dtype, [float], 'ceil') repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"ceil<{pda.dtype},{pda.ndim}>", args={ - "func": "ceil", "array": pda, }, ) @@ -316,11 +308,11 @@ def floor(pda: pdarray) -> pdarray: >>> ak.floor(ak.linspace(1.1,5.5,5)) array([1, 2, 3, 4, 5]) """ + _datatype_check(pda.dtype, [float], 'floor') repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"floor<{pda.dtype},{pda.ndim}>", args={ - "func": "floor", - "array": pda, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -350,11 +342,11 @@ def round(pda: pdarray) -> pdarray: >>> ak.round(ak.array([1.1, 2.5, 3.14159])) array([1, 3, 3]) """ + _datatype_check(pda.dtype, [float], 'round') repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"round<{pda.dtype},{pda.ndim}>", args={ - "func": "round", - "array": pda, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -384,15 +376,17 @@ def trunc(pda: pdarray) -> pdarray: >>> ak.trunc(ak.array([1.1, 2.5, 3.14159])) array([1, 2, 3]) """ + _datatype_check(pda.dtype, [float], 'trunc') repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"trunc<{pda.dtype},{pda.ndim}>", args={ - "func": "trunc", "array": pda, }, ) return create_pdarray(type_cast(str, repMsg)) +# Noted during Sept 2024 rewrite of EfuncMsg.chpl -- although it's "sign" here, inside the +# chapel code, it's "sgn" @typechecked def sign(pda: pdarray) -> pdarray: @@ -418,11 +412,11 @@ def sign(pda: pdarray) -> pdarray: >>> ak.sign(ak.array([-10, -5, 0, 5, 10])) array([-1, -1, 0, 1, 1]) """ + _datatype_check(pda.dtype, [int, float], 'sign') repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"sgn<{pda.dtype},{pda.ndim}>", args={ - "func": "sign", - "array": pda, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -456,10 +450,9 @@ def isfinite(pda: pdarray) -> pdarray: array([True, True, False]) """ repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"isfinite<{pda.ndim}>", args={ - "func": "isfinite", - "array": pda, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -493,10 +486,9 @@ def isinf(pda: pdarray) -> pdarray: array([False, False, True]) """ repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"isinf<{pda.ndim}>", args={ - "func": "isinf", - "array": pda, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -539,10 +531,9 @@ def isnan(pda: pdarray) -> pdarray: raise TypeError("isnan only supports pdarray of numeric type.") repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"isnan<{pda.ndim}>", args={ - "func": "isnan", - "array": pda, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -586,82 +577,78 @@ def log(pda: pdarray) -> pdarray: array([0, 3.3219280948873626, 6.6438561897747253]) """ repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"log<{pda.dtype},{pda.ndim}>", args={ - "func": "log", - "array": pda, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @typechecked -def log10(x: pdarray) -> pdarray: +def log10(pda: pdarray) -> pdarray: """ Return the element-wise base 10 log of the array. Parameters __________ - x : pdarray - array to compute on + pda : pdarray + array to compute on Returns _______ pdarray contain values of the base 10 log """ repMsg = generic_msg( - cmd=f"efunc{x.ndim}D", + cmd=f"log10<{pda.dtype},{pda.ndim}>", args={ - "func": "log10", - "array": x, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @typechecked -def log2(x: pdarray) -> pdarray: +def log2(pda: pdarray) -> pdarray: """ Return the element-wise base 2 log of the array. Parameters __________ - x : pdarray - array to compute on + pda : pdarray + array to compute on Returns _______ pdarray contain values of the base 2 log """ repMsg = generic_msg( - cmd=f"efunc{x.ndim}D", + cmd=f"log2<{pda.dtype},{pda.ndim}>", args={ - "func": "log2", - "array": x, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @typechecked -def log1p(x: pdarray) -> pdarray: +def log1p(pda: pdarray) -> pdarray: """ Return the element-wise natural log of one plus the array. Parameters __________ - x : pdarray - array to compute on + pda : pdarray + array to compute on Returns _______ pdarray contain values of the natural log of one plus the array """ repMsg = generic_msg( - cmd=f"efunc{x.ndim}D", + cmd=f"log1p<{pda.dtype},{pda.ndim}>", args={ - "func": "log1p", - "array": x, + "pda": pda, }, ) return create_pdarray(repMsg) @@ -697,10 +684,9 @@ def exp(pda: pdarray) -> pdarray: 33.494295836924771, 13.478894913238722]) """ repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"exp<{pda.dtype},{pda.ndim}>", args={ - "func": "exp", - "array": pda, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -736,10 +722,9 @@ def expm1(pda: pdarray) -> pdarray: 32.494295836924771, 12.478894913238722]) """ repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"expm1<{pda.dtype},{pda.ndim}>", args={ - "func": "expm1", - "array": pda, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -771,10 +756,9 @@ def square(pda: pdarray) -> pdarray: array([1, 4, 9, 16]) """ repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"square<{pda.dtype},{pda.ndim}>", args={ - "func": "square", - "array": pda, + "pda": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -815,11 +799,11 @@ def cumsum(pda: pdarray) -> pdarray: >>> ak.cumsum(ak.randint(0, 1, 5, dtype=ak.bool_)) array([0, 1, 1, 2, 3]) """ + _datatype_check(pda.dtype, [int, float, ak_uint64, ak_bool], 'cumsum') repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"cumsum<{pda.dtype},{pda.ndim}>", args={ - "func": "cumsum", - "array": pda, + "x": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -857,11 +841,11 @@ def cumprod(pda: pdarray) -> pdarray: array([1.5728783400481925, 7.0472855509390593, 33.78523998586553, 134.05309592737584, 450.21589865655358]) """ + _datatype_check(pda.dtype, [int, float, ak_uint64, ak_bool], 'cumprod') repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"cumprod<{pda.dtype},{pda.ndim}>", args={ - "func": "cumprod", - "array": pda, + "x": pda, }, ) return create_pdarray(type_cast(str, repMsg)) @@ -1373,7 +1357,7 @@ def rad2deg(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: elif where is False: return pda else: - return _merge_where(pda[:], where, 180 * (pda[where] / np.pi)) + return _merge_where(pda[:], where, 180*(pda[where]/np.pi)) @typechecked @@ -1405,7 +1389,7 @@ def deg2rad(pda: pdarray, where: Union[bool, pdarray] = True) -> pdarray: elif where is False: return pda else: - return _merge_where(pda[:], where, (np.pi * pda[where] / 180)) + return _merge_where(pda[:], where, (np.pi*pda[where]/180)) def _hash_helper(a): @@ -1532,13 +1516,14 @@ def hash( def _hash_single(pda: pdarray, full: bool = True): if pda.dtype == bigint: return hash(pda.bigint_to_uint_arrays()) + _datatype_check (pda.dtype, [float, int, ak_uint64], 'hash') + hname = "hash128" if full else "hash64" repMsg = type_cast( str, generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"{hname}<{pda.dtype},{pda.ndim}>", args={ - "func": "hash128" if full else "hash64", - "array": pda, + "x": pda, }, ), ) @@ -2599,19 +2584,18 @@ def matmul(pdaLeft: pdarray, pdaRight: pdarray): """ if pdaLeft.ndim != pdaRight.ndim: raise ValueError("matmul requires matrices of matching rank.") - cmd = f"matmul<{pdaLeft.dtype},{pdaRight.dtype},{pdaLeft.ndim}>" args = { "x1": pdaLeft, "x2": pdaRight, } - repMsg = generic_msg( - cmd=cmd, - args=args, + return create_pdarray( + generic_msg( + cmd=cmd, + args=args, + ) ) - return create_pdarray(repMsg) - def vecdot(x1: pdarray, x2: pdarray): """ diff --git a/arkouda/pdarrayclass.py b/arkouda/pdarrayclass.py index 188bdaffda..24b2b5f453 100644 --- a/arkouda/pdarrayclass.py +++ b/arkouda/pdarrayclass.py @@ -3527,10 +3527,9 @@ def popcount(pda: pdarray) -> pdarray: return sum(popcount(a) for a in pda.bigint_to_uint_arrays()) # type: ignore else: repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"popcount<{pda.dtype},{pda.ndim}>", args={ - "func": "popcount", - "array": pda, + "pda": pda, }, ) return create_pdarray(repMsg) @@ -3568,10 +3567,9 @@ def parity(pda: pdarray) -> pdarray: return reduce(lambda x, y: x ^ y, [parity(a) for a in pda.bigint_to_uint_arrays()]) else: repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"parity<{pda.dtype},{pda.ndim}>", args={ - "func": "parity", - "array": pda, + "pda": pda, }, ) return create_pdarray(repMsg) @@ -3636,10 +3634,9 @@ def clz(pda: pdarray) -> pdarray: return lz else: repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"clz<{pda.dtype},{pda.ndim}>", args={ - "func": "clz", - "array": pda, + "pda": pda, }, ) return create_pdarray(repMsg) @@ -3706,10 +3703,9 @@ def ctz(pda: pdarray) -> pdarray: return tz else: repMsg = generic_msg( - cmd=f"efunc{pda.ndim}D", + cmd=f"ctz<{pda.dtype},{pda.ndim}>", args={ - "func": "ctz", - "array": pda, + "pda": pda, }, ) return create_pdarray(repMsg) diff --git a/src/EfuncMsg.chpl b/src/EfuncMsg.chpl index 44e8f56c19..09321700e6 100644 --- a/src/EfuncMsg.chpl +++ b/src/EfuncMsg.chpl @@ -51,423 +51,227 @@ module EfuncMsg proc sine (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return sin(x); } - proc sine (x : [?d] ?t) : [d] real throws - { throw new Error ("sin does not support type %s".format(type2str(t))) ; } - @arkouda.registerCommand (name="cos") proc cosine (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return cos(x); } - proc cosine (x : [?d] ?t) : [d] real throws - { throw new Error ("cos does not support type %s".format(type2str(t))) ; } - @arkouda.registerCommand (name="tan") proc tangent (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return tan(x); } - proc tangent (x : [?d] ?t) : [d] real throws - { throw new Error ("tan does not support type %s".format(type2str(t))) ; } - @arkouda.registerCommand (name="arcsin") proc arcsine (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return asin(x); } - proc arcsine (x : [?d] ?t) : [d] real throws - { throw new Error ("arcsin does not support type %s".format(type2str(t))) ; } - @arkouda.registerCommand (name="arccos") proc arccosine (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return acos(x); } - proc arccosine (x : [?d] ?t) : [d] real throws - { throw new Error ("arccos does not support type %s".format(type2str(t))) ; } - @arkouda.registerCommand (name="arctan") proc arctangent (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return atan(x); } - proc arctangent (x : [?d] ?t) : [d] real throws - { throw new Error ("arctan does not support type %s".format(type2str(t))) ; } - @arkouda.registerCommand (name="sinh") proc hypsine (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return sinh(x); } - proc hypsine (x : [?d] ?t) : [d] real throws - { throw new Error ("sinh does not support type %s".format(type2str(t))) ; } - @arkouda.registerCommand (name="cosh") proc hypcosine (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return cosh(x); } - proc hypcosine (x : [?d] ?t) : [d] real throws - { throw new Error ("cosh does not support type %s".format(type2str(t))) ; } - @arkouda.registerCommand (name="tanh") proc hyptangent (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return tanh(x); } - proc hyptangent (x : [?d] ?t) : [d] real throws - { throw new Error ("tanh does not support type %s".format(type2str(t))) ; } - @arkouda.registerCommand (name="arcsinh") proc archypsine (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return asinh(x); } - proc archypsine (x : [?d] ?t) : [d] real throws - { throw new Error ("arcsinh does not support type %s".format(type2str(t))) ; } - @arkouda.registerCommand (name="arccosh") proc archypcosine (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return acosh(x); } - proc archypcosine (x : [?d] ?t) : [d] real throws - { throw new Error ("arccosh does not support type %s".format(type2str(t))) ; } - @arkouda.registerCommand (name="arctanh") proc archyptangent (x : [?d] ?t) : [d] real throws where (t==int || t==real || t==uint) { return atanh(x); } - proc archyptangent (x : [?d] ?t) : [d] real throws - { throw new Error ("arctanh does not support type %s".format(type2str(t))) ; } + @arkouda.registerCommand(name="abs") + proc absolut (const ref pda : [?d] ?t) : [d] t throws + where (t==int || t==real) { return abs(pda) ; } // TODO maybe: allow uint and return pda -// End of rewrite section -- delete this comment after all of EfuncMsg is rewritten. + @arkouda.registerCommand(name="square") + proc boxy (const ref pda : [?d] ?t) : [d] t throws + where (t==int || t==real || t==uint) { return square(pda) ; } - @arkouda.registerND - proc efuncMsg(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab, param nd: int): MsgTuple throws { - param pn = Reflection.getRoutineName(); - var repMsg: string; // response message; attributes of returned array(s) will be appended to this string - var name = msgArgs.getValueOf("array"); - var efunc = msgArgs.getValueOf("func"); - var rname = st.nextName(); + @arkouda.registerCommand(name="exp") + proc expo (const ref pda : [?d] ?t) : [d] real throws + where (t==int || t==real || t==uint) { return exp(pda) ; } - var gEnt: borrowed GenSymEntry = getGenericTypedArrayEntry(name, st); + @arkouda.registerCommand(name="expm1") + proc expom (const ref pda : [?d] ?t) : [d] real throws + where (t==int || t==real || t==uint) { return expm1(pda) ; } - eLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), - "cmd: %s efunc: %s pdarray: %s".format(cmd,efunc,st.attrib(name))); - - select (gEnt.dtype) { - when (DType.Int64) { - var e = toSymEntry(gEnt,int, nd); - ref ea = e.a; - select efunc - { - when "abs" { - st.addEntry(rname, new shared SymEntry(abs(ea))); - } - when "log" { - st.addEntry(rname, new shared SymEntry(log(ea))); - } - when "round" { - st.addEntry(rname, new shared SymEntry(ea)); - } - when "sgn" { - st.addEntry(rname, new shared SymEntry(sgn(ea))); - } - when "exp" { - st.addEntry(rname, new shared SymEntry(exp(ea))); - } - when "square" { - st.addEntry(rname, new shared SymEntry(square(ea))); - } - when "cumsum" { - if nd == 1 { - // check there's enough room to create a copy for scan and throw if creating a copy would go over memory limit - overMemLimit(numBytes(int) * e.size); - st.addEntry(rname, new shared SymEntry(+ scan e.a)); - } else { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype,nd); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - } - when "cumprod" { - if nd == 1 { - // check there's enough room to create a copy for scan and throw if creating a copy would go over memory limit - overMemLimit(numBytes(int) * e.size); - st.addEntry(rname, new shared SymEntry(* scan e.a)); - } else { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype,nd); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - } - when "hash64" { - overMemLimit(numBytes(int) * e.size); - var a = st.addEntry(rname, e.tupShape, uint); - forall (ai, x) in zip(a.a, e.a) { - ai = sipHash64(x): uint; - } - } - when "hash128" { - overMemLimit(numBytes(int) * e.size * 2); - var rname2 = st.nextName(); - var a1 = st.addEntry(rname2, e.tupShape, uint); - var a2 = st.addEntry(rname, e.tupShape, uint); - forall (a1i, a2i, x) in zip(a1.a, a2.a, e.a) { - (a1i, a2i) = sipHash128(x): (uint, uint); - } - // Put first array's attrib in repMsg and let common - // code append second array's attrib - repMsg += "created " + st.attrib(rname2) + "+"; - } - when "popcount" { - st.addEntry(rname, new shared SymEntry(popCount(ea))); - } - when "parity" { - st.addEntry(rname, new shared SymEntry(parity(ea))); - } - when "clz" { - st.addEntry(rname, new shared SymEntry(clz(ea))); - } - when "ctz" { - st.addEntry(rname, new shared SymEntry(ctz(ea))); - } - when "not" { - st.addEntry(rname, new shared SymEntry(!e.a)); - } - otherwise { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - } - } - when (DType.Float64) { - var e = toSymEntry(gEnt,real, nd); - ref ea = e.a; - select efunc - { - when "abs" { - st.addEntry(rname, new shared SymEntry(abs(ea))); - } - when "ceil" { - st.addEntry(rname, new shared SymEntry(ceil(ea))); - } - when "floor" { - st.addEntry(rname, new shared SymEntry(floor(ea))); - } - when "round" { - st.addEntry(rname, new shared SymEntry(round(ea))); - } - when "trunc" { - st.addEntry(rname, new shared SymEntry(trunc(ea))); - } - when "sgn" { - st.addEntry(rname, new shared SymEntry(sgn(ea))); - } - when "isfinite" { - st.addEntry(rname, new shared SymEntry(isFinite(ea))); - } - when "isinf" { - st.addEntry(rname, new shared SymEntry(isInf(ea))); - } - when "isnan" { - st.addEntry(rname, new shared SymEntry(isNan(ea))); - } - when "log" { - st.addEntry(rname, new shared SymEntry(log(ea))); - } - when "log1p" { - st.addEntry(rname, new shared SymEntry(log1p(ea))); - } - when "log2" { - st.addEntry(rname, new shared SymEntry(log2(ea))); - } - when "log10" { - st.addEntry(rname, new shared SymEntry(log10(ea))); - } - when "exp" { - st.addEntry(rname, new shared SymEntry(exp(ea))); - } - when "expm1" { - st.addEntry(rname, new shared SymEntry(expm1(ea))); - } - when "square" { - st.addEntry(rname, new shared SymEntry(square(ea))); - } - when "cumsum" { - if nd == 1 { - // check there's enough room to create a copy for scan and throw if creating a copy would go over memory limit - overMemLimit(numBytes(real) * e.size); - st.addEntry(rname, new shared SymEntry(+ scan e.a)); - } else { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype,nd); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - } - when "cumprod" { - if nd == 1 { - // check there's enough room to create a copy for scan and throw if creating a copy would go over memory limit - overMemLimit(numBytes(real) * e.size); - st.addEntry(rname, new shared SymEntry(* scan e.a)); - } else { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype,nd); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - } - when "hash64" { - overMemLimit(numBytes(real) * e.size); - var a = st.addEntry(rname, e.tupShape, uint); - forall (ai, x) in zip(a.a, e.a) { - ai = sipHash64(x): uint; - } - } - when "hash128" { - overMemLimit(numBytes(real) * e.size * 2); - var rname2 = st.nextName(); - var a1 = st.addEntry(rname2, e.tupShape, uint); - var a2 = st.addEntry(rname, e.tupShape, uint); - forall (a1i, a2i, x) in zip(a1.a, a2.a, e.a) { - (a1i, a2i) = sipHash128(x): (uint, uint); - } - // Put first array's attrib in repMsg and let common - // code append second array's attrib - repMsg += "created " + st.attrib(rname2) + "+"; - } - otherwise { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - } - } - when (DType.Bool) { - var e = toSymEntry(gEnt,bool, nd); - select efunc - { - when "cumsum" { - if nd == 1 { - var ia = makeDistArray(e.a.domain, int); // make a copy of bools as ints blah! - ia = e.a:int; - // check there's enough room to create a copy for scan and throw if creating a copy would go over memory limit - overMemLimit(numBytes(int) * ia.size); - st.addEntry(rname, new shared SymEntry(+ scan ia)); - } else { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype,nd); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - } - when "cumprod" { - if nd == 1 { - var ia = makeDistArray(e.a.domain, int); // make a copy of bools as ints blah! - ia = e.a:int; - // check there's enough room to create a copy for scan and throw if creating a copy would go over memory limit - overMemLimit(numBytes(int) * ia.size); - st.addEntry(rname, new shared SymEntry(* scan ia)); - } else { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype,nd); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - } - when "not" { - st.addEntry(rname, new shared SymEntry(!e.a)); - } - otherwise { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } + @arkouda.registerCommand(name="log") + proc log_e (const ref pda : [?d] ?t) : [d] real throws + where (t==int || t==real || t==uint) { return log(pda) ; } + + @arkouda.registerCommand(name="log1p") + proc log_1p (const ref pda : [?d] ?t) : [d] real throws + where (t==int || t==real || t==uint) { return log1p(pda) ; } + +// chapel log2 returns ints when given ints, so the input has been cast to real. + + @arkouda.registerCommand(name="log2") + proc log_2 (const ref pda : [?d] ?t) : [d] real throws + where (t==int || t==real || t==uint) { return log2(pda:real) ; } + + @arkouda.registerCommand(name="log10") + proc log_10 (const ref pda : [?d] ?t) : [d] real throws + where (t==int || t==real || t==uint) { return log10(pda) ; } + + @arkouda.registerCommand(name="isinf") + proc isinf_ (pda : [?d] real) : [d] bool { return (isInf(pda)) ; } + + @arkouda.registerCommand(name="isnan") + proc isnan_ (pda : [?d] real) : [d] bool { return (isNan(pda)) ; } + + @arkouda.registerCommand(name="isfinite") + proc isfinite_ (pda : [?d] real) : [d] bool { return (isFinite(pda)) ; } + + @arkouda.registerCommand (name="floor") + proc floor_ (pda : [?d] ?t) : [d] real throws + where (t==real) { return floor(pda); } + + @arkouda.registerCommand (name="ceil") + proc ceil_ (pda : [?d] ?t) : [d] real throws + where (t==real) { return ceil(pda); } + + @arkouda.registerCommand (name="round") + proc round_ (pda : [?d] ?t) : [d] real throws + where (t==real) { return round(pda); } + + @arkouda.registerCommand (name="trunc") + proc trunc_ (pda : [?d] ?t) : [d] real throws + where (t==real) { return trunc(pda); } + + @arkouda.registerCommand (name="popcount") + proc popcount_ (pda : [?d] ?t) : [d] t throws + where (t==int || t==uint) { return popCount(pda); } + + @arkouda.registerCommand (name="parity") + proc parity_ (pda : [?d] ?t) : [d] t throws + where (t==int || t==uint) { return parity(pda); } + + @arkouda.registerCommand (name="clz") + proc clz_ (pda : [?d] ?t) : [d] t throws + where (t==int || t==uint) { return clz(pda); } + + @arkouda.registerCommand (name="ctz") + proc ctz_ (pda : [?d] ?t) : [d] t throws + where (t==int || t==uint) { return ctz(pda); } + + @arkouda.registerCommand(name="not") + proc not_ (pda : [?d] ?t) : [d] bool throws + where (t==int || t==uint || t==bool) { return (!pda) ; } + +// cumsum and cumprod + + + proc cumspReturnType(type t) type + do return if t == bool then int else t; + + + @arkouda.registerCommand(name="cumsum") + proc cumsum(x : [?d] ?t) : [d] cumspReturnType(t) throws + where (t==int || t==real || t==uint || t==bool) { + if x.rank == 1 { + overMemLimit(numBytes(int) * x.size) ; + if t == bool { + var ix = makeDistArray(x.domain, int); // make a copy of bools as ints blah! + ix = x:int ; + return (+scan (ix)); + } else { + return (+scan x) ; } + } else { + throw new Error ("Over mem limit in cumsum") ; } - when (DType.UInt64) { - var e = toSymEntry(gEnt,uint, nd); - ref ea = e.a; - select efunc - { - when "popcount" { - st.addEntry(rname, new shared SymEntry(popCount(ea))); - } - when "clz" { - st.addEntry(rname, new shared SymEntry(clz(ea))); - } - when "ctz" { - st.addEntry(rname, new shared SymEntry(ctz(ea))); - } - when "round" { - st.addEntry(rname, new shared SymEntry(ea)); - } - when "sgn" { - st.addEntry(rname, new shared SymEntry(sgn(ea))); - } - when "cumsum" { - if nd == 1 { - // check there's enough room to create a copy for scan and throw if creating a copy would go over memory limit - overMemLimit(numBytes(uint) * e.size); - st.addEntry(rname, new shared SymEntry(+ scan e.a)); - } else { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype,nd); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - } - when "cumprod" { - if nd == 1 { - // check there's enough room to create a copy for scan and throw if creating a copy would go over memory limit - overMemLimit(numBytes(uint) * e.size); - st.addEntry(rname, new shared SymEntry(* scan e.a)); - } else { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype,nd); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } - } - when "parity" { - st.addEntry(rname, new shared SymEntry(parity(ea))); - } - when "hash64" { - overMemLimit(numBytes(uint) * e.size); - var a = st.addEntry(rname, e.tupShape, uint); - forall (ai, x) in zip(a.a, e.a) { - ai = sipHash64(x): uint; - } - } - when "hash128" { - overMemLimit(numBytes(uint) * e.size * 2); - var rname2 = st.nextName(); - var a1 = st.addEntry(rname2, e.tupShape, uint); - var a2 = st.addEntry(rname, e.tupShape, uint); - forall (a1i, a2i, x) in zip(a1.a, a2.a, e.a) { - (a1i, a2i) = sipHash128(x): (uint, uint); - } - // Put first array's attrib in repMsg and let common - // code append second array's attrib - repMsg += "created " + st.attrib(rname2) + "+"; - } - when "log" { - st.addEntry(rname, new shared SymEntry(log(ea))); - } - when "exp" { - st.addEntry(rname, new shared SymEntry(exp(ea))); - } - when "square" { - st.addEntry(rname, new shared SymEntry(square(ea))); - } - when "not" { - st.addEntry(rname, new shared SymEntry(!e.a)); - } - otherwise { - var errorMsg = notImplementedError(pn,efunc,gEnt.dtype); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } + } + + @arkouda.registerCommand(name="cumprod") + proc cumprod(x : [?d] ?t) : [d] cumspReturnType(t) throws + where (t==int || t==real || t==uint || t==bool) { + if x.rank == 1 { + overMemLimit(numBytes(int) * x.size) ; + if t == bool { + var ix = makeDistArray(x.domain, int); // make a copy of bools as ints blah! + ix = x:int ; + return (*scan (ix)); + } else { + return (*scan x) ; } + } else { + throw new Error ("Over mem limit in cumprod") ; } - otherwise { - var errorMsg = unrecognizedTypeError(pn, dtype2str(gEnt.dtype)); - eLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg); - return new MsgTuple(errorMsg, MsgType.ERROR); - } } - // Append instead of assign here, to allow for 2 return arrays from hash128 + + // sgn is a special case. It is the only thing that returns int(8). + + @arkouda.registerCommand(name="sgn") + proc sign (pda : [?d] ?t) : [d] int(8) throws + where (t==int || t==real) { return (sgn(pda)); } + + // Hashes are more of a challenge to unhook from the old interface, but they + // have been pulled out into their own functions. + + @arkouda.instantiateAndRegister + proc hash64 (cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab, type array_dtype, param array_nd: int) : MsgTuple throws + where (array_dtype==real || array_dtype==int || array_dtype==uint) { + if array_nd != 1 { + return MsgTuple.error("hash64 does not support multi-dim yet."); + } + const efunc = msgArgs.getValueOf("x"), + e = st[msgArgs["x"]]: SymEntry(array_dtype,array_nd); + const rname = st.nextName(); + overMemLimit(numBytes(array_dtype)*e.size); + var a = st.addEntry(rname, e.tupShape, uint); + forall (ai, x) in zip (a.a, e.a) { + ai = sipHash64(x) : uint ; + } + var repMsg = "created " + st.attrib(rname); + eLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),repMsg); + return new MsgTuple(repMsg, MsgType.NORMAL); + } + proc hash64 (cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab, type array_dtype, param array_nd: int) : MsgTuple throws { + return MsgTuple.error("hash64 does not support type %s".format(types2str(array_dtype))); + } + + @arkouda.instantiateAndRegister + proc hash128 (cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab, type array_dtype, param array_nd: int) : MsgTuple throws + where (array_dtype==real || array_dtype==int || array_dtype==uint) { + if array_nd != 1 { + return MsgTuple.error("hash128 does not support multi-dim yet."); + } + const efunc = msgArgs.getValueOf("x"), + e = st[msgArgs["x"]]: SymEntry(array_dtype,array_nd); + const rname = st.nextName(); + var rname2 = st.nextName(); + overMemLimit(numBytes(array_dtype) * e.size * 2); + var a1 = st.addEntry(rname2, e.tupShape, uint); + var a2 = st.addEntry(rname, e.tupShape, uint); + forall (a1i, a2i, x) in zip(a1.a, a2.a, e.a) { + (a1i, a2i) = sipHash128(x): (uint, uint); + } + var repMsg = "created " + st.attrib(rname2) + "+"; repMsg += "created " + st.attrib(rname); eLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),repMsg); return new MsgTuple(repMsg, MsgType.NORMAL); } + proc hash128 (cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab, type array_dtype, param array_nd: int) : MsgTuple throws { + return MsgTuple.error("hash128 does not support type %s".format(types2str(array_dtype))); + } + +// End of rewrite section -- delete this comment after all of EfuncMsg is rewritten. private proc square(x) do return x * x; private proc log1p(x: real):real do return log(1.0 + x); diff --git a/tests/numpy/numeric_test.py b/tests/numpy/numeric_test.py index c9782b9bce..fa1ec73923 100644 --- a/tests/numpy/numeric_test.py +++ b/tests/numpy/numeric_test.py @@ -2,11 +2,13 @@ import numpy as np import pytest +import warnings import arkouda as ak from arkouda.client import get_max_array_rank from arkouda.dtypes import dtype as akdtype from arkouda.dtypes import str_ +from arkouda.testing import assert_almost_equivalent as ak_assert_almost_equivalent ARRAY_TYPES = [ak.int64, ak.float64, ak.bool_, ak.uint64, str_] NUMERIC_TYPES = [ak.int64, ak.float64, ak.bool_, ak.uint64] @@ -316,7 +318,9 @@ def test_histogram(self, num_type): def test_histogram_multidim(self, num_type1, num_type2): # test 2d histogram seed = 1 - ak_x, ak_y = ak.randint(1, 100, 1000, seed=seed, dtype=num_type1), ak.randint(1, 100, 1000, seed=seed + 1, dtype=num_type2) + ak_x, ak_y = ak.randint(1, 100, 1000, seed=seed, dtype=num_type1), ak.randint( + 1, 100, 1000, seed=seed + 1, dtype=num_type2 + ) np_x, np_y = ak_x.to_ndarray(), ak_y.to_ndarray() np_hist, np_x_edges, np_y_edges = np.histogram2d(np_x, np_y) ak_hist, ak_x_edges, ak_y_edges = ak.histogram2d(ak_x, ak_y) @@ -344,12 +348,16 @@ def test_histogram_multidim(self, num_type1, num_type2): assert np.allclose(np_edge.tolist(), ak_edge.to_list()) @pytest.mark.parametrize("num_type", NO_BOOL) - def test_log_and_exp(self, num_type): + @pytest.mark.parametrize("op", ["exp", "log", "expm1", "log2", "log10", "log1p"]) + def test_log_and_exp(self, num_type, op): na = np.linspace(1, 10, 10).astype(num_type) pda = ak.array(na, dtype=num_type) - for npfunc, akfunc in ((np.log, ak.log), (np.exp, ak.exp)): - assert np.allclose(npfunc(na), akfunc(pda).to_ndarray()) + akfunc = getattr(ak, op) + npfunc = getattr(np, op) + + ak_assert_almost_equivalent(akfunc(pda), npfunc(na)) + with pytest.raises(TypeError): akfunc(np.array([range(0, 10)]).astype(num_type)) @@ -368,6 +376,19 @@ def test_abs(self, num_type): with pytest.raises(TypeError): ak.abs(np.array([range(0, 10)]).astype(num_type)) + @pytest.mark.parametrize("num_type", NO_BOOL) + @pytest.mark.parametrize("prob_size", pytest.prob_size) + def test_square(self, prob_size, num_type): + nda = np.arange(prob_size).astype(num_type) + if num_type != ak.uint64: + nda = nda - prob_size // 2 + pda = ak.array(nda) + + assert np.allclose(np.square(nda), ak.square(pda).to_ndarray()) + + with pytest.raises(TypeError): + ak.square(np.array([range(-10, 10)]).astype(ak.bool_)) + @pytest.mark.parametrize("num_type1", NO_BOOL) @pytest.mark.parametrize("num_type2", NO_BOOL) def test_dot(self, num_type1, num_type2): @@ -549,7 +570,7 @@ def test_value_counts_error(self): def test_isnan(self): """ - Test efunc `isnan`; it returns a pdarray of element-wise T/F values for whether it is NaN + Test isnan; it returns a pdarray of element-wise T/F values for whether it is NaN (not a number) """ npa = np.array([1, 2, None, 3, 4], dtype="float64") @@ -565,6 +586,19 @@ def test_isnan(self): with pytest.raises(TypeError): ak.isnan(ark_s_string) + def test_isinf_isfinite(self): + """ + Test isinf and isfinite. These return pdarrays of T/F values as appropriate. + """ + nda = np.array([0, 9999.9999]) + pda = ak.array(nda) + warnings.filterwarnings("ignore") + nda_blowup = np.exp(nda) + warnings.filterwarnings("default") + pda_blowup = ak.exp(pda) + assert (np.isinf(nda_blowup) == ak.isinf(pda_blowup).to_ndarray()).all() + assert (np.isfinite(nda_blowup) == ak.isfinite(pda_blowup).to_ndarray()).all() + def test_str_cat_cast(self): test_strs = [ ak.array([f"str {i}" for i in range(101)]), @@ -881,7 +915,7 @@ def test_tril(self, data_type, prob_size): # ints and bools are checked for equality; floats are checked for closeness - check = lambda a, b, t: ( + check = lambda a, b, t: ( # noqa: E731 np.allclose(a.tolist(), b.tolist()) if akdtype(t) == "float64" else (a == b).all() ) @@ -906,7 +940,7 @@ def test_triu(self, data_type, prob_size): # ints and bools are checked for equality; floats are checked for closeness - check = lambda a, b, t: ( + check = lambda a, b, t: ( # noqa: E731 np.allclose(a.tolist(), b.tolist()) if akdtype(t) == "float64" else (a == b).all() ) @@ -932,7 +966,7 @@ def test_transpose(self, data_type, prob_size): # ints and bools are checked for equality; floats are checked for closeness - check = lambda a, b, t: ( + check = lambda a, b, t: ( # noqa: E731 np.allclose(a.tolist(), b.tolist()) if akdtype(t) == "float64" else (a == b).all() ) @@ -955,7 +989,7 @@ def test_eye(self, data_type, prob_size): # ints and bools are checked for equality; floats are checked for closeness - check = lambda a, b, t: ( + check = lambda a, b, t: ( # noqa: E731 np.allclose(a.tolist(), b.tolist()) if akdtype(t) == "float64" else (a == b).all() ) @@ -979,7 +1013,7 @@ def test_matmul(self, data_type1, data_type2, prob_size): # ints and bools are checked for equality; floats are checked for closeness - check = lambda a, b, t: ( + check = lambda a, b, t: ( # noqa: E731 np.allclose(a.tolist(), b.tolist()) if akdtype(t) == "float64" else (a == b).all() ) @@ -1008,7 +1042,7 @@ def test_vecdot(self, data_type1, data_type2, prob_size): # ints and bools are checked for equality; floats are checked for closeness - check = lambda a, b, t: ( + check = lambda a, b, t: ( # noqa: E731 np.allclose(a.tolist(), b.tolist()) if akdtype(t) == "float64" else (a == b).all() ) From dd5c0781495de272108e0f4cdf4caf8e3477f3ad Mon Sep 17 00:00:00 2001 From: drculhane Date: Thu, 14 Nov 2024 09:26:12 -0500 Subject: [PATCH 2/2] Confirmed to cause error with cumsum and cumprod --- src/EfuncMsg.chpl | 85 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 2 deletions(-) diff --git a/src/EfuncMsg.chpl b/src/EfuncMsg.chpl index 09321700e6..306c19f882 100644 --- a/src/EfuncMsg.chpl +++ b/src/EfuncMsg.chpl @@ -176,11 +176,46 @@ module EfuncMsg // cumsum and cumprod +// The code below causes an anomaly. + +// The currently-commented-out versions of cumsum and cumprod register correctly, if used +// instead of the currently-non-commented versions. The intent is to register both cumsum +// and cumprod for data types int, uint, real, and bool. + +// If the data type is int, uint, or real, the output type matches the input type. +// If the data type is bool, the output is it. + +// However ... + +// The non-commented versions of cumsum and cumprod only register the first case +// of the function, i.e. the file src/registry/Commands.chpl includes: + +//proc ark_cumsum_bool_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do +// return ark_reg_cumsum_generic(cmd, msgArgs, st, array_dtype_0=bool, array_nd_0=1); +// registerFunction('cumsum', ark_cumsum_bool_1, 'EfuncMsg', 211); + +// and: + +// proc ark_cumprod_int_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do +// return ark_reg_cumprod_generic(cmd, msgArgs, st, array_dtype_0=int, array_nd_0=1); +// registerFunction('cumprod', ark_cumprod_int_1, 'EfuncMsg', 233); +// +// proc ark_cumprod_uint_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do +// return ark_reg_cumprod_generic(cmd, msgArgs, st, array_dtype_0=uint, array_nd_0=1); +// registerFunction('cumprod', ark_cumprod_uint_1, 'EfuncMsg', 233); +// +// proc ark_cumprod_real_1(cmd: string, msgArgs: borrowed MessageArgs, st: borrowed SymTab): MsgTuple throws do +// return ark_reg_cumprod_generic(cmd, msgArgs, st, array_dtype_0=real, array_nd_0=1); +// registerFunction('cumprod', ark_cumprod_real_1, 'EfuncMsg', 233); + +// In short: cumsum and cumprod should both be registered for int, uint, real and bool. +// But in each case, this is done with two proc definitions that only differ by the types +// in the "where" clause, and only the first instance of the function is registered. proc cumspReturnType(type t) type do return if t == bool then int else t; - +/* @arkouda.registerCommand(name="cumsum") proc cumsum(x : [?d] ?t) : [d] cumspReturnType(t) throws where (t==int || t==real || t==uint || t==bool) { @@ -197,7 +232,52 @@ module EfuncMsg throw new Error ("Over mem limit in cumsum") ; } } - + */ + + @arkouda.registerCommand(name="cumsum") + proc cumsum(x : [?d] ?t) : [d] int throws + where (t==bool) { + if x.rank == 1 { + overMemLimit(numBytes(int) * x.size) ; + var ix = makeDistArray(x.domain, int); // make a copy of bools as ints blah! + ix = x:int ; + return (+scan (ix)); + } else { + throw new Error ("Over mem limit in cumsum") ; + } + } + proc cumsum(x : [?d] ?t) : [d] t throws + where (t==int || t==real || t==uint) { + if x.rank == 1 { + overMemLimit(numBytes(int) * x.size) ; + return (+scan x) ; + } else { + throw new Error ("Over mem limit in cumsum") ; + } + } + + @arkouda.registerCommand(name="cumprod") + proc cumprod(x : [?d] ?t) : [d] t throws + where (t==int || t==real || t==uint) { + if x.rank == 1 { + overMemLimit(numBytes(int) * x.size) ; + return (*scan x) ; + } else { + throw new Error ("Over mem limit in cumprod") ; + } + } + proc cumprod(x : [?d] ?t) : [d] int throws + where (t==bool) { + if x.rank == 1 { + overMemLimit(numBytes(int) * x.size) ; + var ix = makeDistArray(x.domain, int); // make a copy of bools as ints blah! + ix = x:int ; + return (*scan (ix)); + } else { + throw new Error ("Over mem limit in cumprod") ; + } + } +/* @arkouda.registerCommand(name="cumprod") proc cumprod(x : [?d] ?t) : [d] cumspReturnType(t) throws where (t==int || t==real || t==uint || t==bool) { @@ -214,6 +294,7 @@ module EfuncMsg throw new Error ("Over mem limit in cumprod") ; } } + */ // sgn is a special case. It is the only thing that returns int(8).