diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml/src/ggml-sycl/common.cpp index 9069c47865f68..05fd5ef46c76a 100644 --- a/ggml/src/ggml-sycl/common.cpp +++ b/ggml/src/ggml-sycl/common.cpp @@ -66,41 +66,6 @@ int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block return sycl_down_blk_size; } -void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const ggml_sycl_op_flatten_t op) try { - - const bool use_src1 = src1 != nullptr; - if(use_src1) - GGML_ASSERT(strcmp(src1->buffer->buft->iface.get_name(src1->buffer->buft), GGML_SYCL_NAME "_Split") != 0); - GGML_ASSERT(strcmp(dst->buffer->buft->iface.get_name(dst->buffer->buft), GGML_SYCL_NAME "_Split") != 0); - - // dd = data device - float * src0_ddf = (float *) src0->data; - float * src1_ddf = use_src1 ? (float *) src1->data : nullptr; - float * dst_ddf = (float *) dst->data; - - ggml_sycl_pool_alloc src0_f(ctx.pool()); - ggml_sycl_pool_alloc src1_f(ctx.pool()); - ggml_sycl_pool_alloc dst_f(ctx.pool()); - - ggml_sycl_set_device(ctx.device); - queue_ptr main_stream = ctx.stream(); - // GGML_SYCL_DEBUG("ctx.device=%d, main_stream=%p src0_on_device=%d, src1_on_device=%d, dst_on_device=%d\n", - // ctx.device, main_stream, src0_on_device, src1_on_device, dst_on_device); - - // do the computation - op(ctx, src0, src1, dst, src0_ddf, src1_ddf, dst_ddf, main_stream); - // print_ggml_tensor("tensor", dst); -} -catch (sycl::exception const &exc) { - - std::cerr << exc.what() << "Exception caught at file:" << __FILE__ - << ", line:" << __LINE__ << std::endl; - std::exit(1); -} - - void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector streams) { for (int i = 0; i < ggml_sycl_info().device_count; ++i) { for (int64_t is = 0; is < GGML_SYCL_MAX_STREAMS; ++is) { diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 27b447ce30d18..3e1ceeaa49486 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -494,12 +494,6 @@ static __dpct_inline__ Tp* get_pointer(sycl::local_accessor acc) { int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size); -typedef void (*ggml_sycl_op_flatten_t)(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream); - template static void k_bin_bcast(const src0_t * src0, const src1_t * src1, dst_t * dst, int ne0, int ne1, int ne2, int ne3, @@ -757,24 +751,22 @@ struct bin_bcast_sycl { template inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, - const queue_ptr &main_stream) { + const ggml_tensor *src1, ggml_tensor *dst) { + dpct::queue_ptr main_stream = ctx.stream(); if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - op()(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream); + op()(ctx, src0, src1, dst, (const float *)src0->data, (const float *)src1->data, (float *)dst->data, main_stream); } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) { - op()(ctx, src0, src1, dst, (const sycl::half *)src0_dd, src1_dd, - (sycl::half *)dst_dd, main_stream); + op()(ctx, src0, src1, dst, (const sycl::half *)src0->data, (const float *)src1->data, + (sycl::half *)dst->data, main_stream); } else if (src0->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F32) { - op()(ctx, src0, src1, dst, (const sycl::half *)src0_dd, src1_dd, dst_dd, + op()(ctx, src0, src1, dst, (const sycl::half *)src0->data, (const float *)src1->data, (float *)dst->data, main_stream); } else if (src0->type == GGML_TYPE_I32 && dst->type == GGML_TYPE_I32) { - op()(ctx, src0, src1, dst, (const int32_t *)src0_dd, (const int32_t *)src1_dd, (int32_t *)dst_dd, + op()(ctx, src0, src1, dst, (const int32_t *)src0->data, (const int32_t *)src1->data, (int32_t *)dst->data, main_stream); } else if (src0->type == GGML_TYPE_I16 && dst->type == GGML_TYPE_I16) { - op()(ctx, src0, src1, dst, (const int16_t *)src0_dd, (const int16_t *)src1_dd, (int16_t *)dst_dd, + op()(ctx, src0, src1, dst, (const int16_t *)src0->data, (const int16_t *)src1->data, (int16_t *)dst->data, main_stream); } else { fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__, @@ -784,8 +776,4 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t } bool gpu_has_xmx(sycl::device &dev); - -void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const ggml_sycl_op_flatten_t op); #endif // GGML_SYCL_COMMON_HPP diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index 1e12cb220e4c1..0423305bb4016 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -509,497 +509,409 @@ static void pad_f32_sycl(const float *x, float *dst, const int ne00, }); } -inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + silu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + gelu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, - const queue_ptr &main_stream) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F32); +inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + gelu_quick_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); - tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); + tanh_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + hardsigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, const queue_ptr &main_stream) { +inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + hardswish_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, const queue_ptr &main_stream) { +inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + exp_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, const queue_ptr &main_stream) { +inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - log_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + log_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, const queue_ptr &main_stream) { +inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + sigmoid_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, const queue_ptr &main_stream) { +inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + sqrt_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, const queue_ptr &main_stream) { +inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + sin_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, const queue_ptr &main_stream) { +inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + cos_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, const queue_ptr &main_stream) { +inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - step_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + step_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, const queue_ptr &main_stream) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_F32); +inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); + GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + neg_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); float negative_slope; memcpy(&negative_slope, dst->op_params, sizeof(float)); - leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), negative_slope, main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + leaky_relu_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), negative_slope, main_stream); } -inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(src0), main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + sqr_f32_sycl(src0_dd, dst_dd, ggml_nelements(dst->src[0]), main_stream); } -inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - const float sf0 = (float)dst->ne[0]/src0->ne[0]; - const float sf1 = (float)dst->ne[1]/src0->ne[1]; - const float sf2 = (float)dst->ne[2]/src0->ne[2]; - const float sf3 = (float)dst->ne[3]/src0->ne[3]; + const float sf0 = (float)dst->ne[0]/dst->src[0]->ne[0]; + const float sf1 = (float)dst->ne[1]/dst->src[0]->ne[1]; + const float sf2 = (float)dst->ne[2]/dst->src[0]->ne[2]; + const float sf3 = (float)dst->ne[3]/dst->src[0]->ne[3]; - upscale_f32_sycl(src0_dd, dst_dd, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], + upscale_f32_sycl(src0_dd, dst_dd, dst->src[0]->nb[0], dst->src[0]->nb[1], dst->src[0]->nb[2], dst->src[0]->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); } -inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors + GGML_ASSERT(dst->src[0]->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); pad_f32_sycl(src0_dd, dst_dd, - src0->ne[0], src0->ne[1], src0->ne[2], + dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2], dst->ne[0], dst->ne[1], dst->ne[2], main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); } -inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT(src1->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); GGML_ASSERT(dst->ne[3] == 1); // just 3D tensors supported + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + const float * src1_dd = static_cast(dst->src[1]->data); + float * dst_dd = static_cast(dst->data); int nb1 = dst->op_params[0] / 4; // 4 bytes of float32 int nb2 = dst->op_params[1] / 4; // 4 bytes of float32 // int nb3 = dst->op_params[2] / 4; // 4 bytes of float32 - unused int offset = dst->op_params[3] / 4; // offset in bytes - acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), src1->ne[0], src1->ne[1], src1->ne[2], nb1, nb2, offset, main_stream); - - GGML_UNUSED(dst); - GGML_UNUSED(ctx); + acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), dst->src[1]->ne[0], dst->src[1]->ne[1], dst->src[1]->ne[2], nb1, nb2, offset, main_stream); } -inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_add(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - ggml_sycl_op_bin_bcast>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream); + ggml_sycl_op_bin_bcast>(ctx, dst->src[0], dst->src[1], dst); } -inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_sub(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - ggml_sycl_op_bin_bcast>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream); + ggml_sycl_op_bin_bcast>(ctx, dst->src[0], dst->src[1], dst); } -inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_mul(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - ggml_sycl_op_bin_bcast>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream); + ggml_sycl_op_bin_bcast>(ctx, dst->src[0], dst->src[1], dst); } -inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - ggml_sycl_op_bin_bcast>(ctx, src0, src1, dst, src0_dd, src1_dd, dst_dd, main_stream); + ggml_sycl_op_bin_bcast>(ctx, dst->src[0], dst->src[1], dst); } void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqrt); + ggml_sycl_op_sqrt(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sin); + ggml_sycl_op_sin(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_cos); + ggml_sycl_op_cos(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_acc); + ggml_sycl_op_acc(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu); + ggml_sycl_op_gelu(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_silu); + ggml_sycl_op_silu(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu_quick); + ggml_sycl_op_gelu_quick(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_tanh); + ggml_sycl_op_tanh(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_relu); + ggml_sycl_op_relu(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sigmoid); + ggml_sycl_op_sigmoid(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardsigmoid); + ggml_sycl_op_hardsigmoid(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardswish); + ggml_sycl_op_hardswish(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_exp); + ggml_sycl_op_exp(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_log); + ggml_sycl_op_log(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_neg); + ggml_sycl_op_neg(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_step); + ggml_sycl_op_step(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_leaky_relu); + ggml_sycl_op_leaky_relu(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqr); + ggml_sycl_op_sqr(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_upscale); + ggml_sycl_op_upscale(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pad); + ggml_sycl_op_pad(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } @@ -1007,24 +919,24 @@ void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_add); + ggml_sycl_op_add(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sub); + ggml_sycl_op_sub(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_mul); + ggml_sycl_op_mul(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_div); + ggml_sycl_op_div(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } diff --git a/ggml/src/ggml-sycl/getrows.cpp b/ggml/src/ggml-sycl/getrows.cpp index b9cf8767cbab2..64665be464762 100644 --- a/ggml/src/ggml-sycl/getrows.cpp +++ b/ggml/src/ggml-sycl/getrows.cpp @@ -257,50 +257,54 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens GGML_UNUSED(ctx); } -void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_d, const float *src1_d, - float *dst_d, const queue_ptr &stream) { +void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src1->type == GGML_TYPE_I32); + GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - GGML_ASSERT(src0->nb[0] == ggml_type_size(src0->type)); - GGML_ASSERT(src1->nb[0] == ggml_type_size(src1->type)); + GGML_ASSERT(dst->src[0]->nb[0] == ggml_type_size(dst->src[0]->type)); + GGML_ASSERT(dst->src[1]->nb[0] == ggml_type_size(dst->src[1]->type)); GGML_ASSERT(dst->nb[0] == ggml_type_size(dst->type)); - const int32_t * src1_i32 = (const int32_t *) src1_d; - - switch (src0->type) { + const int32_t * src1_i32 = (const int32_t *) dst->src[1]->data; + /* TODO: Refactor and remove duplicates */ + switch (dst->src[0]->type) { case GGML_TYPE_F16: - get_rows_sycl_float(ctx, src0, src1, dst, (const sycl::half *)src0_d, - src1_i32, dst_d, stream); + get_rows_sycl_float(ctx, dst->src[0], dst->src[1], dst, (const sycl::half *)dst->src[0]->data, + src1_i32, (float *)dst->data, ctx.stream()); break; case GGML_TYPE_F32: - get_rows_sycl_float(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); + get_rows_sycl_float(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data, + src1_i32, (float *)dst->data, ctx.stream()); break; case GGML_TYPE_Q4_0: if (ctx.opt_feature.reorder && dst->op == GGML_OP_MUL_MAT) { - get_rows_sycl_reorder(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); + get_rows_sycl_reorder(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data, + src1_i32, (float *)dst->data, ctx.stream()); } else { - get_rows_sycl(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); + get_rows_sycl(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data, + src1_i32, (float *)dst->data, ctx.stream()); } break; case GGML_TYPE_Q4_1: - get_rows_sycl(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); + get_rows_sycl(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data, + src1_i32, (float *)dst->data, ctx.stream()); break; case GGML_TYPE_Q5_0: - get_rows_sycl(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); + get_rows_sycl(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data, + src1_i32, (float *)dst->data, ctx.stream()); break; case GGML_TYPE_Q5_1: - get_rows_sycl(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); + get_rows_sycl(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data, + src1_i32, (float *)dst->data, ctx.stream()); break; case GGML_TYPE_Q8_0: - get_rows_sycl(ctx, src0, src1, dst, src0_d, src1_i32, dst_d, stream); + get_rows_sycl(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data, + src1_i32, (float *)dst->data, ctx.stream()); break; default: // TODO: k-quants - GGML_LOG_ERROR("%s: unsupported type: %s\n", __func__, ggml_type_name(src0->type)); + GGML_LOG_ERROR("%s: unsupported type: %s\n", __func__, ggml_type_name(dst->src[0]->type)); GGML_ABORT("fatal error"); } } diff --git a/ggml/src/ggml-sycl/getrows.hpp b/ggml/src/ggml-sycl/getrows.hpp index cdbe6c2f41bc3..1c560cd9f8941 100644 --- a/ggml/src/ggml-sycl/getrows.hpp +++ b/ggml/src/ggml-sycl/getrows.hpp @@ -15,9 +15,6 @@ #include "common.hpp" -void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_d, const float *src1_d, - float *dst_d, const queue_ptr &stream); +void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst); #endif // GGML_SYCL_GETROWS_HPP diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 39d53da33035d..ab8efba8165e9 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -1988,16 +1988,8 @@ catch (sycl::exception const &exc) { std::exit(1); } -static void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_d, const float *src1_d, - float *dst_d, - const queue_ptr &main_stream) { - - ggml_sycl_op_bin_bcast>(ctx, dst, src0, dst, nullptr, src0_d, dst_d, main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(src1_d); +static void ggml_sycl_op_repeat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { + ggml_sycl_op_bin_bcast>(ctx, dst, dst->src[0], dst); } @@ -2132,13 +2124,14 @@ catch (sycl::exception const &exc) { std::exit(1); } -static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, const queue_ptr &main_stream) { +static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); const int32_t * opts = (const int32_t *)dst->op_params; enum ggml_op_pool op = static_cast(opts[0]); @@ -2149,8 +2142,8 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens const int p0 = opts[5]; const int p1 = opts[6]; - const int64_t IH = src0->ne[1]; - const int64_t IW = src0->ne[0]; + const int64_t IH = dst->src[0]->ne[1]; + const int64_t IW = dst->src[0]->ne[0]; const int64_t N = dst->ne[3]; const int64_t OC = dst->ne[2]; @@ -2169,163 +2162,125 @@ static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, const ggml_tens parallel_elements, src0_dd, dst_dd, op, item_ct1); }); - - GGML_UNUSED(src1); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); } -inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, - const queue_ptr &main_stream) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); +inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - const int64_t ne = ggml_nelements(src0); + const int64_t ne = ggml_nelements(dst->src[0]); sum_rows_f32_sycl(src0_dd, dst_dd, ne, 1, main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); } -inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - const int64_t ncols = src0->ne[0]; - const int64_t nrows = ggml_nrows(src0); + const int64_t ncols = dst->src[0]->ne[0]; + const int64_t nrows = ggml_nrows(dst->src[0]); sum_rows_f32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); } -inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); + GGML_ASSERT(dst->type == GGML_TYPE_I32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + int32_t * dst_dd = static_cast(dst->data); - GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT( dst->type == GGML_TYPE_I32); - const int64_t ncols = src0->ne[0]; - const int64_t nrows = ggml_nrows(src0); + const int64_t ncols = dst->src[0]->ne[0]; + const int64_t nrows = ggml_nrows(dst->src[0]); enum ggml_sort_order order = (enum ggml_sort_order) dst->op_params[0]; - argsort_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, order, main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + argsort_f32_i32_sycl(src0_dd, (int *) dst_dd, ncols, nrows, order, main_stream); } -inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, - float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_argmax(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_I32); - const int64_t ncols = src0->ne[0]; - const int64_t nrows = ggml_nrows(src0); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + int32_t * dst_dd = static_cast(dst->data); - argmax_f32_i32_sycl(src0_dd, (int *)dst_dd, ncols, nrows, main_stream); + const int64_t ncols = dst->src[0]->ne[0]; + const int64_t nrows = ggml_nrows(dst->src[0]); - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); + argmax_f32_i32_sycl(src0_dd, dst_dd, ncols, nrows, main_stream); } -inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, - const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_diag_mask_inf(ggml_backend_sycl_context & ctx,ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); - const int64_t ne00 = src0->ne[0]; - const int64_t ne01 = src0->ne[1]; - const int nrows0 = ggml_nrows(src0); + const int64_t ne00 = dst->src[0]->ne[0]; + const int64_t ne01 = dst->src[0]->ne[1]; + const int nrows0 = ggml_nrows(dst->src[0]); const int n_past = ((int32_t *) dst->op_params)[0]; diag_mask_inf_f32_sycl(src0_dd, dst_dd, ne00, nrows0, ne01, n_past, main_stream); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); } -inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); float scale; memcpy(&scale, dst->op_params, sizeof(float)); - scale_f32_sycl(src0_dd, dst_dd, scale, ggml_nelements(src0), main_stream); + scale_f32_sycl(src0_dd, dst_dd, scale, ggml_nelements(dst->src[0]), main_stream); /* DPCT1010:87: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code. */ SYCL_CHECK(0); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); } -inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, - const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); float min; float max; memcpy(&min, dst->op_params, sizeof(float)); memcpy(&max, (float *) dst->op_params + 1, sizeof(float)); - clamp_f32_sycl(src0_dd, dst_dd, min, max, ggml_nelements(src0), main_stream); + clamp_f32_sycl(src0_dd, dst_dd, min, max, ggml_nelements(dst->src[0]), ctx.stream()); /* DPCT1010:88: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code. */ SYCL_CHECK(0); - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); } static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) { @@ -2695,37 +2650,37 @@ catch (sycl::exception const &exc) { static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_repeat); + ggml_sycl_op_repeat(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_get_rows); + ggml_sycl_op_get_rows(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_norm); + ggml_sycl_op_norm(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rms_norm); + ggml_sycl_op_rms_norm(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } static void ggml_sycl_l2_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_l2_norm); + ggml_sycl_op_l2_norm(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_SYCL_DEBUG("call %s\n", __func__); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_group_norm); + ggml_sycl_op_group_norm(ctx, dst); GGML_SYCL_DEBUG("call %s done\n", __func__); } @@ -3269,48 +3224,48 @@ catch (sycl::exception const &exc) { } static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_scale); + ggml_sycl_op_scale(ctx, dst); } static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_clamp); + ggml_sycl_op_clamp(ctx, dst); } static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_diag_mask_inf); + ggml_sycl_op_diag_mask_inf(ctx, dst); } static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(dst->src[0])); // TODO: this restriction is temporary until non-cont support is implemented - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rope); + ggml_sycl_op_rope(ctx, dst); } static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pool2d); + ggml_sycl_op_pool2d(ctx, dst); } static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_im2col); + ggml_sycl_op_im2col(ctx, dst); } static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(dst->src[0])); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum); + ggml_sycl_op_sum(ctx, dst); } static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(dst->src[0])); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum_rows); + ggml_sycl_op_sum_rows(ctx, dst); } static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(dst->src[0])); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argsort); + ggml_sycl_op_argsort(ctx, dst); } static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { GGML_ASSERT(ggml_is_contiguous(dst->src[0])); - ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argmax); + ggml_sycl_op_argmax(ctx, dst); } @@ -3335,7 +3290,7 @@ catch (sycl::exception const &exc) { std::exit(1); } -static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * dst) { +static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * dst) try { if (!g_sycl_loaded) return false; if (dst->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(dst->src[0]->buffer)) { @@ -3528,6 +3483,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg } return true; +} catch (sycl::exception & e) { + std::cerr << e.what() << "Exception caught at file:" << __FILE__ << ", line:" << __LINE__ << std::endl; + std::exit(1); } GGML_API void ggml_backend_sycl_get_device_description(int device, char *description, diff --git a/ggml/src/ggml-sycl/im2col.cpp b/ggml/src/ggml-sycl/im2col.cpp index 6146a99edbe77..009b42035d026 100644 --- a/ggml/src/ggml-sycl/im2col.cpp +++ b/ggml/src/ggml-sycl/im2col.cpp @@ -82,10 +82,9 @@ static void im2col_sycl( } } -void ggml_sycl_op_im2col( - ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream) { +void ggml_sycl_op_im2col(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(src1->type == GGML_TYPE_F32); @@ -115,12 +114,8 @@ void ggml_sycl_op_im2col( const size_t batch_offset = src1->nb[3] / 4; // nb is byte offset, src is type float32 if (dst->type == GGML_TYPE_F16) { - im2col_sycl(src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream); + im2col_sycl((const float *) src1->data, (sycl::half *)dst->data, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, ctx.stream()); } else { - im2col_sycl(src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, main_stream); + im2col_sycl((const float *) src1->data, (float *)dst->data, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, ctx.stream()); } - - GGML_UNUSED(src0); - GGML_UNUSED(src0_dd); - GGML_UNUSED(ctx); } diff --git a/ggml/src/ggml-sycl/im2col.hpp b/ggml/src/ggml-sycl/im2col.hpp index 7db144fbbe524..dbbb248ddb4fc 100644 --- a/ggml/src/ggml-sycl/im2col.hpp +++ b/ggml/src/ggml-sycl/im2col.hpp @@ -16,8 +16,6 @@ #include "common.hpp" void ggml_sycl_op_im2col( - ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const float *src0_dd, const float *src1_dd, float *dst_dd, - const queue_ptr &main_stream); + ggml_backend_sycl_context & ctx, ggml_tensor *dst); #endif // GGML_SYCL_IM2COL_HPP diff --git a/ggml/src/ggml-sycl/norm.cpp b/ggml/src/ggml-sycl/norm.cpp index d9678da8f042e..1d2cf5bc8e680 100644 --- a/ggml/src/ggml-sycl/norm.cpp +++ b/ggml/src/ggml-sycl/norm.cpp @@ -397,90 +397,78 @@ static void l2_norm_f32_sycl(const float* x, float* dst, const int ncols, } } -void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, const ggml_tensor* src1, - ggml_tensor* dst, const float* src0_dd, - const float* src1_dd, float* dst_dd, - const queue_ptr& main_stream) { +void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - const int64_t ne00 = src0->ne[0]; - const int64_t nrows = ggml_nrows(src0); + const int64_t ne00 = dst->src[0]->ne[0]; + const int64_t nrows = ggml_nrows(dst->src[0]); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); float eps; memcpy(&eps, dst->op_params, sizeof(float)); norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device); - - (void)src1; - (void)dst; - (void)src1_dd; } -void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, - const ggml_tensor* src1, ggml_tensor* dst, - const float* src0_dd, const float* src1_dd, - float* dst_dd, - const queue_ptr& main_stream) { +void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); int num_groups = dst->op_params[0]; + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); float eps; memcpy(&eps, dst->op_params + 1, sizeof(float)); - int group_size = src0->ne[0] * src0->ne[1] * ((src0->ne[2] + num_groups - 1) / num_groups); - group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, src0->ne[0] * src0->ne[1] * src0->ne[2], main_stream, ctx.device); - - (void)src1; - (void)dst; - (void)src1_dd; - GGML_UNUSED(ctx); + int group_size = dst->src[0]->ne[0] * dst->src[0]->ne[1] * ((dst->src[0]->ne[2] + num_groups - 1) / num_groups); + group_norm_f32_sycl(src0_dd, dst_dd, num_groups, eps, group_size, dst->src[0]->ne[0] * dst->src[0]->ne[1] * dst->src[0]->ne[2], main_stream, ctx.device); } -void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, - const ggml_tensor* src1, ggml_tensor* dst, - const float* src0_dd, const float* src1_dd, - float* dst_dd, - const queue_ptr& main_stream) { +void ggml_sycl_op_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - const int64_t ne00 = src0->ne[0]; - const int64_t nrows = ggml_nrows(src0); + const int64_t ne00 = dst->src[0]->ne[0]; + const int64_t nrows = ggml_nrows(dst->src[0]); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); float eps; memcpy(&eps, dst->op_params, sizeof(float)); rms_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device); - - (void)src1; - (void)dst; - (void)src1_dd; } -void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, - const ggml_tensor* src1, ggml_tensor* dst, - const float* src0_dd, const float* src1_dd, - float* dst_dd, - const queue_ptr& main_stream) { +void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - const int64_t ne00 = src0->ne[0]; - const int64_t nrows = ggml_nrows(src0); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + + const int64_t ne00 = dst->src[0]->ne[0]; + const int64_t nrows = ggml_nrows(dst->src[0]); + const float * src0_dd = static_cast(dst->src[0]->data); + float * dst_dd = static_cast(dst->data); float eps; memcpy(&eps, dst->op_params, sizeof(float)); l2_norm_f32_sycl(src0_dd, dst_dd, ne00, nrows, eps, main_stream, ctx.device); - (void)src1; - (void)dst; - (void)src1_dd; } diff --git a/ggml/src/ggml-sycl/norm.hpp b/ggml/src/ggml-sycl/norm.hpp index 11e91680cc496..612cd67cf9183 100644 --- a/ggml/src/ggml-sycl/norm.hpp +++ b/ggml/src/ggml-sycl/norm.hpp @@ -15,27 +15,12 @@ #include "common.hpp" -void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, const ggml_tensor* src1, - ggml_tensor* dst, const float* src0_dd, - const float* src1_dd, float* dst_dd, - const queue_ptr& main_stream); - -void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, - const ggml_tensor* src1, ggml_tensor* dst, - const float* src0_dd, const float* src1_dd, - float* dst_dd, - const queue_ptr& main_stream); - -void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, - const ggml_tensor* src1, ggml_tensor* dst, - const float* src0_dd, const float* src1_dd, - float* dst_dd, - const queue_ptr& main_stream); - -void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, - const ggml_tensor* src1, ggml_tensor* dst, - const float* src0_dd, const float* src1_dd, - float* dst_dd, - const queue_ptr& main_stream); +void ggml_sycl_op_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst); + +void ggml_sycl_op_rms_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst); + +void ggml_sycl_op_group_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst); + +void ggml_sycl_op_l2_norm(ggml_backend_sycl_context& ctx, ggml_tensor* dst); #endif // GGML_SYCL_NORM_HPP diff --git a/ggml/src/ggml-sycl/rope.cpp b/ggml/src/ggml-sycl/rope.cpp index 1244b231af738..bbcb356e97992 100644 --- a/ggml/src/ggml-sycl/rope.cpp +++ b/ggml/src/ggml-sycl/rope.cpp @@ -192,18 +192,15 @@ static void rope_neox_sycl( } } -void ggml_sycl_op_rope( - ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, float *dst_dd, const queue_ptr &main_stream) { - const ggml_tensor * src2 = dst->src[2]; +void ggml_sycl_op_rope(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { - GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); - GGML_ASSERT(src0->type == dst->type); + GGML_ASSERT(dst->src[0]->type == dst->type); - const int64_t ne00 = src0->ne[0]; - const int64_t ne01 = src0->ne[1]; - const int64_t nr = ggml_nrows(src0); + const int64_t ne00 = dst->src[0]->ne[0]; + const int64_t ne01 = dst->src[0]->ne[1]; + const int64_t nr = ggml_nrows(dst->src[0]); //const int n_past = ((int32_t *) dst->op_params)[0]; const int n_dims = ((int32_t *) dst->op_params)[1]; @@ -228,49 +225,47 @@ void ggml_sycl_op_rope( const bool is_neox = mode & GGML_ROPE_TYPE_NEOX; - const int32_t * pos = (const int32_t *) src1_dd; + const int32_t * pos = (const int32_t *) dst->src[1]->data; const float * freq_factors = nullptr; - if (src2 != nullptr) { - freq_factors = (const float *) src2->data; + if (dst->src[2] != nullptr) { + freq_factors = (const float *) dst->src[2]->data; } rope_corr_dims corr_dims; ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims.v); + dpct::queue_ptr main_stream = ctx.stream(); + SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + // compute if (is_neox) { - if (src0->type == GGML_TYPE_F32) { + if (dst->src[0]->type == GGML_TYPE_F32) { rope_neox_sycl( - (const float *)src0_dd, (float *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, + (const float *)dst->src[0]->data, (float *)dst->data, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, attn_factor, corr_dims, freq_factors, main_stream ); - } else if (src0->type == GGML_TYPE_F16) { + } else if (dst->src[0]->type == GGML_TYPE_F16) { rope_neox_sycl( - (const sycl::half *)src0_dd, (sycl::half *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, + (const sycl::half *)dst->src[0]->data, (sycl::half *)dst->data, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, attn_factor, corr_dims, freq_factors, main_stream ); } else { GGML_ABORT("fatal error"); } } else { - if (src0->type == GGML_TYPE_F32) { + if (dst->src[0]->type == GGML_TYPE_F32) { rope_norm_sycl( - (const float *)src0_dd, (float *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, + (const float *)dst->src[0]->data, (float *)dst->data, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, attn_factor, corr_dims, freq_factors, main_stream ); - } else if (src0->type == GGML_TYPE_F16) { + } else if (dst->src[0]->type == GGML_TYPE_F16) { rope_norm_sycl( - (const sycl::half *)src0_dd, (sycl::half *)dst_dd, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, + (const sycl::half *)dst->src[0]->data, (sycl::half *)dst->data, ne00, n_dims, nr, pos, freq_scale, ne01, freq_base, ext_factor, attn_factor, corr_dims, freq_factors, main_stream ); } else { GGML_ABORT("fatal error"); } } - - GGML_UNUSED(src1); - GGML_UNUSED(dst); - GGML_UNUSED(src1_dd); - GGML_UNUSED(ctx); } diff --git a/ggml/src/ggml-sycl/rope.hpp b/ggml/src/ggml-sycl/rope.hpp index 00354c3131bd7..a399bddb8a07b 100644 --- a/ggml/src/ggml-sycl/rope.hpp +++ b/ggml/src/ggml-sycl/rope.hpp @@ -15,8 +15,6 @@ #include "common.hpp" -void ggml_sycl_op_rope( - ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst, - const float *src0_dd, const float *src1_dd, float *dst_dd, const queue_ptr &main_stream); +void ggml_sycl_op_rope(ggml_backend_sycl_context & ctx, ggml_tensor *dst); #endif // GGML_SYCL_ROPE_HPP