diff --git a/cuTENSOR/contraction.cu b/cuTENSOR/contraction.cu index 1d338456..6a85d159 100644 --- a/cuTENSOR/contraction.cu +++ b/cuTENSOR/contraction.cu @@ -179,15 +179,15 @@ int main() * cuTENSOR *************************/ - cutensorHandle_t handle; - HANDLE_ERROR(cutensorInit(&handle)); + cutensorHandle_t *handle; + HANDLE_ERROR(cutensorCreate(&handle)); /********************** * Create Tensor Descriptors **********************/ cutensorTensorDescriptor_t descA; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descA, nmodeA, extentA.data(), @@ -195,7 +195,7 @@ int main() typeA, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descB; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descB, nmodeB, extentB.data(), @@ -203,7 +203,7 @@ int main() typeB, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descC; - HANDLE_ERROR(cutensorInitTensorDescriptor( &handle, + HANDLE_ERROR(cutensorInitTensorDescriptor( handle, &descC, nmodeC, extentC.data(), @@ -215,19 +215,19 @@ int main() **********************************************/ uint32_t alignmentRequirementA; - HANDLE_ERROR(cutensorGetAlignmentRequirement(&handle, + HANDLE_ERROR(cutensorGetAlignmentRequirement(handle, A_d, &descA, &alignmentRequirementA)); uint32_t alignmentRequirementB; - HANDLE_ERROR(cutensorGetAlignmentRequirement(&handle, + HANDLE_ERROR(cutensorGetAlignmentRequirement(handle, B_d, &descB, &alignmentRequirementB)); uint32_t alignmentRequirementC; - HANDLE_ERROR(cutensorGetAlignmentRequirement(&handle, + HANDLE_ERROR(cutensorGetAlignmentRequirement(handle, C_d, &descC, &alignmentRequirementC)); @@ -237,7 +237,7 @@ int main() *******************************/ cutensorContractionDescriptor_t desc; - HANDLE_ERROR(cutensorInitContractionDescriptor(&handle, + HANDLE_ERROR(cutensorInitContractionDescriptor(handle, &desc, &descA, modeA.data(), alignmentRequirementA, &descB, modeB.data(), alignmentRequirementB, @@ -251,7 +251,7 @@ int main() cutensorContractionFind_t find; HANDLE_ERROR(cutensorInitContractionFind( - &handle, &find, + handle, &find, CUTENSOR_ALGO_DEFAULT)); /********************** @@ -259,7 +259,7 @@ int main() **********************/ uint64_t worksize = 0; - HANDLE_ERROR(cutensorContractionGetWorkspaceSize(&handle, + HANDLE_ERROR(cutensorContractionGetWorkspaceSize(handle, &desc, &find, CUTENSOR_WORKSPACE_RECOMMENDED, &worksize)); @@ -279,7 +279,7 @@ int main() **************************/ cutensorContractionPlan_t plan; - HANDLE_ERROR(cutensorInitContractionPlan(&handle, + HANDLE_ERROR(cutensorInitContractionPlan(handle, &plan, &desc, &find, @@ -300,7 +300,7 @@ int main() GPUTimer timer; timer.start(); - err = cutensorContraction(&handle, + err = cutensorContraction(handle, &plan, (void*) &alpha, A_d, B_d, (void*) &beta, C_d, C_d, diff --git a/cuTENSOR/contraction_autotuning.cu b/cuTENSOR/contraction_autotuning.cu index d11ede5a..61fbd641 100644 --- a/cuTENSOR/contraction_autotuning.cu +++ b/cuTENSOR/contraction_autotuning.cu @@ -183,15 +183,15 @@ int main() * cuTENSOR *************************/ - cutensorHandle_t handle; - HANDLE_ERROR(cutensorInit(&handle)); + cutensorHandle_t *handle; + HANDLE_ERROR(cutensorCreate(&handle)); /********************** * Create Tensor Descriptors **********************/ cutensorTensorDescriptor_t descA; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descA, nmodeA, extentA.data(), @@ -199,7 +199,7 @@ int main() typeA, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descB; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descB, nmodeB, extentB.data(), @@ -207,7 +207,7 @@ int main() typeB, CUTENSOR_OP_IDENTITY) ); cutensorTensorDescriptor_t descC; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descC, nmodeC, extentC.data(), @@ -219,19 +219,19 @@ int main() **********************************************/ uint32_t alignmentRequirementA; - HANDLE_ERROR(cutensorGetAlignmentRequirement(&handle, + HANDLE_ERROR(cutensorGetAlignmentRequirement(handle, A_d, &descA, &alignmentRequirementA)); uint32_t alignmentRequirementB; - HANDLE_ERROR(cutensorGetAlignmentRequirement(&handle, + HANDLE_ERROR(cutensorGetAlignmentRequirement(handle, B_d, &descB, &alignmentRequirementB)); uint32_t alignmentRequirementC; - HANDLE_ERROR(cutensorGetAlignmentRequirement(&handle, + HANDLE_ERROR(cutensorGetAlignmentRequirement(handle, C_d, &descC, &alignmentRequirementC)); @@ -241,7 +241,7 @@ int main() *******************************/ cutensorContractionDescriptor_t desc; - HANDLE_ERROR(cutensorInitContractionDescriptor(&handle, + HANDLE_ERROR(cutensorInitContractionDescriptor(handle, &desc, &descA, modeA.data(), alignmentRequirementA, &descB, modeB.data(), alignmentRequirementB, @@ -255,11 +255,11 @@ int main() cutensorContractionFind_t find; HANDLE_ERROR(cutensorInitContractionFind( - &handle, &find, + handle, &find, CUTENSOR_ALGO_DEFAULT)); uint64_t worksize = 0; - HANDLE_ERROR(cutensorContractionGetWorkspaceSize(&handle, + HANDLE_ERROR(cutensorContractionGetWorkspaceSize(handle, &desc, &find, CUTENSOR_WORKSPACE_MAX, &worksize)); @@ -298,7 +298,7 @@ int main() **************************/ cutensorContractionFind_t find; - err = cutensorInitContractionFind(&handle, &find, (cutensorAlgo_t) algo); + err = cutensorInitContractionFind(handle, &find, (cutensorAlgo_t) algo); if (err == CUTENSOR_STATUS_SUCCESS) { @@ -307,7 +307,7 @@ int main() **************************/ cutensorContractionPlan_t plan; - err = cutensorInitContractionPlan(&handle, + err = cutensorInitContractionPlan(handle, &plan, &desc, &find, @@ -319,7 +319,7 @@ int main() GPUTimer timer; timer.start(); - err = cutensorContraction(&handle, + err = cutensorContraction(handle, &plan, (void*) &alpha, A_d, B_d, (void*) &beta, C_d, C_d, diff --git a/cuTENSOR/contraction_plan_cache.cu b/cuTENSOR/contraction_plan_cache.cu index 91202a18..a6e35e22 100644 --- a/cuTENSOR/contraction_plan_cache.cu +++ b/cuTENSOR/contraction_plan_cache.cu @@ -179,8 +179,8 @@ int main() * cuTENSOR *************************/ - cutensorHandle_t handle; - HANDLE_ERROR(cutensorInit(&handle)); + cutensorHandle_t *handle; + HANDLE_ERROR(cutensorCreate(&handle)); /********************** * Setup planCache @@ -189,11 +189,11 @@ int main() size_t sizeCache = numCachelines * sizeof(cutensorPlanCacheline_t); printf("Allocating: %.2f kB for the cache\n", sizeCache / 1000.); cutensorPlanCacheline_t* cachelines = (cutensorPlanCacheline_t*) malloc(sizeCache); - HANDLE_ERROR( cutensorHandleAttachPlanCachelines(&handle, cachelines, numCachelines) ); + HANDLE_ERROR( cutensorHandleAttachPlanCachelines(handle, cachelines, numCachelines) ); const char cacheFilename[] = "./cache.bin"; uint32_t numCachelinesRead = 0; - cutensorStatus_t status = cutensorHandleReadCacheFromFile(&handle, cacheFilename, &numCachelinesRead); + cutensorStatus_t status = cutensorHandleReadCacheFromFile(handle, cacheFilename, &numCachelinesRead); if (status == CUTENSOR_STATUS_SUCCESS) { printf("%d cachelines have been successfully read from file (%s).\n", numCachelinesRead, cacheFilename); @@ -212,7 +212,7 @@ int main() **********************/ cutensorTensorDescriptor_t descA; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descA, nmodeA, extentA.data(), @@ -220,7 +220,7 @@ int main() typeA, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descB; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descB, nmodeB, extentB.data(), @@ -228,7 +228,7 @@ int main() typeB, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descC; - HANDLE_ERROR(cutensorInitTensorDescriptor( &handle, + HANDLE_ERROR(cutensorInitTensorDescriptor( handle, &descC, nmodeC, extentC.data(), @@ -240,19 +240,19 @@ int main() **********************************************/ uint32_t alignmentRequirementA; - HANDLE_ERROR(cutensorGetAlignmentRequirement(&handle, + HANDLE_ERROR(cutensorGetAlignmentRequirement(handle, A_d, &descA, &alignmentRequirementA)); uint32_t alignmentRequirementB; - HANDLE_ERROR(cutensorGetAlignmentRequirement(&handle, + HANDLE_ERROR(cutensorGetAlignmentRequirement(handle, B_d, &descB, &alignmentRequirementB)); uint32_t alignmentRequirementC; - HANDLE_ERROR(cutensorGetAlignmentRequirement(&handle, + HANDLE_ERROR(cutensorGetAlignmentRequirement(handle, C_d, &descC, &alignmentRequirementC)); @@ -262,7 +262,7 @@ int main() *******************************/ cutensorContractionDescriptor_t desc; - HANDLE_ERROR(cutensorInitContractionDescriptor(&handle, + HANDLE_ERROR(cutensorInitContractionDescriptor(handle, &desc, &descA, modeA.data(), alignmentRequirementA, &descB, modeB.data(), alignmentRequirementB, @@ -276,12 +276,12 @@ int main() cutensorContractionFind_t find; HANDLE_ERROR(cutensorInitContractionFind( - &handle, &find, + handle, &find, CUTENSOR_ALGO_DEFAULT)); const cutensorCacheMode_t cacheMode = CUTENSOR_CACHE_MODE_PEDANTIC; HANDLE_ERROR(cutensorContractionFindSetAttribute( - &handle, + handle, &find, CUTENSOR_CONTRACTION_FIND_CACHE_MODE, &cacheMode, @@ -289,7 +289,7 @@ int main() const cutensorAutotuneMode_t autotuneMode = CUTENSOR_AUTOTUNE_INCREMENTAL; HANDLE_ERROR(cutensorContractionFindSetAttribute( - &handle, + handle, &find, CUTENSOR_CONTRACTION_FIND_AUTOTUNE_MODE, &autotuneMode , @@ -297,7 +297,7 @@ int main() const uint32_t incCount = 4; HANDLE_ERROR(cutensorContractionFindSetAttribute( - &handle, + handle, &find, CUTENSOR_CONTRACTION_FIND_INCREMENTAL_COUNT, &incCount, @@ -308,7 +308,7 @@ int main() **********************/ uint64_t worksize = 0; - HANDLE_ERROR(cutensorContractionGetWorkspaceSize(&handle, + HANDLE_ERROR(cutensorContractionGetWorkspaceSize(handle, &desc, &find, CUTENSOR_WORKSPACE_MAX, &worksize)); // TODO @@ -342,20 +342,20 @@ int main() const cutensorCacheMode_t cacheMode = CUTENSOR_CACHE_MODE_NONE; HANDLE_ERROR(cutensorContractionFindSetAttribute( - &handle, + handle, &find_copy, CUTENSOR_CONTRACTION_FIND_CACHE_MODE, &cacheMode, sizeof(cutensorCacheMode_t))); // To take advantage of the incremental-autotuning (via the cache), it's important to re-initialize the plan - HANDLE_ERROR(cutensorInitContractionPlan(&handle, + HANDLE_ERROR(cutensorInitContractionPlan(handle, &plan, &desc, &find_copy, worksize)); - HANDLE_ERROR(cutensorContraction(&handle, + HANDLE_ERROR(cutensorContraction(handle, &plan, (void*) &alpha, A_d, B_d, (void*) &beta, C_d, C_d, @@ -375,13 +375,13 @@ int main() timer.start(); // To take advantage of the incremental-autotuning (via the cache), it's important to re-initialize the plan - HANDLE_ERROR(cutensorInitContractionPlan(&handle, + HANDLE_ERROR(cutensorInitContractionPlan(handle, &plan, &desc, &find, worksize)); - cutensorStatus_t err = cutensorContraction(&handle, + cutensorStatus_t err = cutensorContraction(handle, &plan, (void*) &alpha, A_d, B_d, (void*) &beta, C_d, C_d, @@ -409,11 +409,11 @@ int main() /* * Optional: Write cache to disk */ - HANDLE_ERROR( cutensorHandleWriteCacheToFile(&handle, cacheFilename) ); + HANDLE_ERROR( cutensorHandleWriteCacheToFile(handle, cacheFilename) ); printf("Cache has been successfully written to file (%s).\n", cacheFilename); // Detach cache and free-up resources - HANDLE_ERROR( cutensorHandleDetachPlanCachelines(&handle) ); + HANDLE_ERROR( cutensorHandleDetachPlanCachelines(handle) ); if (A) free(A); if (B) free(B); diff --git a/cuTENSOR/contraction_simple.cu b/cuTENSOR/contraction_simple.cu index 49c63b35..30f150d4 100644 --- a/cuTENSOR/contraction_simple.cu +++ b/cuTENSOR/contraction_simple.cu @@ -242,15 +242,15 @@ int main() * cuTENSOR *************************/ - cutensorHandle_t handle; - HANDLE_ERROR(cutensorInit(&handle)); + cutensorHandle_t *handle; + HANDLE_ERROR(cutensorCreate(&handle)); /********************** * Create Tensor Descriptors **********************/ cutensorTensorDescriptor_t descA; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descA, nmodeA, extentA.data(), @@ -258,7 +258,7 @@ int main() typeA, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descB; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descB, nmodeB, extentB.data(), @@ -266,14 +266,14 @@ int main() typeB, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descC; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descC, nmodeC, extentC.data(), NULL /* stride */, typeC, CUTENSOR_OP_IDENTITY)); - HANDLE_ERROR(cutensorContractionSimple(&handle, + HANDLE_ERROR(cutensorContractionSimple(handle, (void*)&alpha, A_d, &descA, modeA.data(), B_d, &descB, modeB.data(), (void*)&beta, C_d, &descC, modeC.data(), diff --git a/cuTENSOR/einsum.cu b/cuTENSOR/einsum.cu index a9621c68..c980a63e 100644 --- a/cuTENSOR/einsum.cu +++ b/cuTENSOR/einsum.cu @@ -415,8 +415,8 @@ void einsum(cutensorHandle_t *handle, int main() { - cutensorHandle_t handle; - cutensorInit(&handle); + cutensorHandle_t *handle; + cutensorCreate(&handle); /********************** * Setup planCache (optional) @@ -424,16 +424,16 @@ int main() constexpr int32_t numCachelines = 1024; size_t sizeCache = numCachelines * sizeof(cutensorPlanCacheline_t); cutensorPlanCacheline_t* cachelines = (cutensorPlanCacheline_t*) malloc(sizeCache); - HANDLE_ERROR( cutensorHandleAttachPlanCachelines(&handle, cachelines, numCachelines) ); + HANDLE_ERROR( cutensorHandleAttachPlanCachelines(handle, cachelines, numCachelines) ); - einsum(&handle, {2, 4, 5}, {4, 8, 7}, "ijn,jmk->inkm"); // contraction (explict) - einsum(&handle, {2, 4, 5}, {4, 8, 7}, "ijn,jmk"); // contraction (implicit) - einsum(&handle, {2, 4, 5}, {}, "nij"); // permutation (implicit) - einsum(&handle, {2, 4, 5}, {}, "nij->ijn"); // permutation (same as previous example, but explicit) - einsum(&handle, {2, 4, 5}, {}, "nij->ji"); // reduction + einsum(handle, {2, 4, 5}, {4, 8, 7}, "ijn,jmk->inkm"); // contraction (explict) + einsum(handle, {2, 4, 5}, {4, 8, 7}, "ijn,jmk"); // contraction (implicit) + einsum(handle, {2, 4, 5}, {}, "nij"); // permutation (implicit) + einsum(handle, {2, 4, 5}, {}, "nij->ijn"); // permutation (same as previous example, but explicit) + einsum(handle, {2, 4, 5}, {}, "nij->ji"); // reduction // Detach cache and free-up resources - HANDLE_ERROR( cutensorHandleDetachPlanCachelines(&handle) ); + HANDLE_ERROR( cutensorHandleDetachPlanCachelines(handle) ); if (cachelines) free (cachelines); return 0; diff --git a/cuTENSOR/elementwise_binary.cu b/cuTENSOR/elementwise_binary.cu index 042d0270..ebc3891a 100644 --- a/cuTENSOR/elementwise_binary.cu +++ b/cuTENSOR/elementwise_binary.cu @@ -175,14 +175,14 @@ int main() * cuTENSOR *************************/ cutensorStatus_t err; - cutensorHandle_t handle; - HANDLE_ERROR(cutensorInit(&handle)); + cutensorHandle_t *handle; + HANDLE_ERROR(cutensorCreate(&handle)); /********************** * Create Tensor Descriptors **********************/ cutensorTensorDescriptor_t descA; - HANDLE_ERROR(cutensorInitTensorDescriptor( &handle, + HANDLE_ERROR(cutensorInitTensorDescriptor( handle, &descA, nmodeA, extentA.data(), @@ -190,7 +190,7 @@ int main() typeA, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descC; - HANDLE_ERROR(cutensorInitTensorDescriptor( &handle, + HANDLE_ERROR(cutensorInitTensorDescriptor( handle, &descC, nmodeC, extentC.data(), @@ -203,7 +203,7 @@ int main() HANDLE_CUDA_ERROR(cudaMemcpy2DAsync(C_d, sizeC, C, sizeC, sizeC, 1, cudaMemcpyDefault, 0)); HANDLE_CUDA_ERROR(cudaDeviceSynchronize()); timer.start(); - err = cutensorElementwiseBinary(&handle, + err = cutensorElementwiseBinary(handle, (void*)&alpha, A_d, &descA, modeA.data(), (void*)&gamma, C_d, &descC, modeC.data(), C_d, &descC, modeC.data(), diff --git a/cuTENSOR/elementwise_permute.cu b/cuTENSOR/elementwise_permute.cu index d9a2b7ba..23327cab 100644 --- a/cuTENSOR/elementwise_permute.cu +++ b/cuTENSOR/elementwise_permute.cu @@ -156,15 +156,15 @@ int main() *************************/ cutensorStatus_t err; - cutensorHandle_t handle; - HANDLE_ERROR(cutensorInit(&handle)); + cutensorHandle_t *handle; + HANDLE_ERROR(cutensorCreate(&handle)); /********************** * Create Tensor Descriptors **********************/ cutensorTensorDescriptor_t descA; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descA, nmodeA, extentA.data(), @@ -172,7 +172,7 @@ int main() typeA, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descC; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descC, nmodeC, extentC.data(), @@ -186,7 +186,7 @@ int main() timer.start(); const floatTypeCompute one = 1.0f; - err = cutensorPermutation(&handle, + err = cutensorPermutation(handle, &one, A_d, &descA, modeA.data(), C_d, &descC, modeC.data(), typeCompute, 0 /* stream */); diff --git a/cuTENSOR/elementwise_trinary.cu b/cuTENSOR/elementwise_trinary.cu index 638a201d..bcdc653b 100644 --- a/cuTENSOR/elementwise_trinary.cu +++ b/cuTENSOR/elementwise_trinary.cu @@ -193,15 +193,15 @@ int main() *************************/ cutensorStatus_t err; - cutensorHandle_t handle; - HANDLE_ERROR(cutensorInit(&handle)); + cutensorHandle_t *handle; + HANDLE_ERROR(cutensorCreate(&handle)); /********************** * Create Tensor Descriptors **********************/ cutensorTensorDescriptor_t descA; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descA, nmodeA, extentA.data(), @@ -209,7 +209,7 @@ int main() typeA, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descB; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descB, nmodeB, extentB.data(), @@ -217,7 +217,7 @@ int main() typeB, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descC; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descC, nmodeC, extentC.data(), @@ -231,7 +231,7 @@ int main() { HANDLE_CUDA_ERROR(cudaMemcpy2DAsync(D_d, sizeC, C, sizeC, sizeC, 1, cudaMemcpyDefault, 0)); timer.start(); - err = cutensorElementwiseTrinary(&handle, + err = cutensorElementwiseTrinary(handle, (void*)&alpha, A_d, &descA, modeA.data(), (void*)&beta , B_d, &descB, modeB.data(), (void*)&gamma, C_d, &descC, modeC.data(), diff --git a/cuTENSOR/reduction.cu b/cuTENSOR/reduction.cu index e129b2ee..1c4d735d 100644 --- a/cuTENSOR/reduction.cu +++ b/cuTENSOR/reduction.cu @@ -160,15 +160,15 @@ int main() * cuTENSOR *************************/ - cutensorHandle_t handle; - HANDLE_ERROR(cutensorInit(&handle)); + cutensorHandle_t *handle; + HANDLE_ERROR(cutensorCreate(&handle)); /********************** * Create Tensor Descriptors **********************/ cutensorTensorDescriptor_t descA; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descA, nmodeA, extentA.data(), @@ -176,7 +176,7 @@ int main() typeA, CUTENSOR_OP_IDENTITY)); cutensorTensorDescriptor_t descC; - HANDLE_ERROR(cutensorInitTensorDescriptor(&handle, + HANDLE_ERROR(cutensorInitTensorDescriptor(handle, &descC, nmodeC, extentC.data(), @@ -190,7 +190,7 @@ int main() **********************/ uint64_t worksize = 0; - HANDLE_ERROR(cutensorReductionGetWorkspaceSize(&handle, + HANDLE_ERROR(cutensorReductionGetWorkspaceSize(handle, A_d, &descA, modeA.data(), C_d, &descC, modeC.data(), C_d, &descC, modeC.data(), @@ -220,7 +220,7 @@ int main() GPUTimer timer; timer.start(); - err = cutensorReduction(&handle, + err = cutensorReduction(handle, (const void*)&alpha, A_d, &descA, modeA.data(), (const void*)&beta, C_d, &descC, modeC.data(), C_d, &descC, modeC.data(),