Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement backward computation for more operators #921

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
47 changes: 46 additions & 1 deletion include/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ extern "C" {
GGML_OP_ROPE,
GGML_OP_ROPE_BACK,
GGML_OP_CLAMP,
GGML_OP_CLAMP_BACK,
GGML_OP_CONV_TRANSPOSE_1D,
GGML_OP_IM2COL,
GGML_OP_CONV_TRANSPOSE_2D,
Expand Down Expand Up @@ -535,10 +536,13 @@ extern "C" {
GGML_UNARY_OP_STEP,
GGML_UNARY_OP_TANH,
GGML_UNARY_OP_ELU,
GGML_UNARY_OP_ELU_BACK,
GGML_UNARY_OP_RELU,
GGML_UNARY_OP_SIGMOID,
GGML_UNARY_OP_GELU,
GGML_UNARY_OP_GELU_BACK,
GGML_UNARY_OP_GELU_QUICK,
GGML_UNARY_OP_GELU_QUICK_BACK,
GGML_UNARY_OP_SILU,
GGML_UNARY_OP_HARDSWISH,
GGML_UNARY_OP_HARDSIGMOID,
Expand Down Expand Up @@ -1074,6 +1078,14 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_elu_back(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_elu_back_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_relu(
struct ggml_context * ctx,
struct ggml_tensor * a);
Expand Down Expand Up @@ -1102,6 +1114,14 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_gelu_back(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_gelu_back_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_gelu_quick(
struct ggml_context * ctx,
struct ggml_tensor * a);
Expand All @@ -1110,6 +1130,14 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_gelu_quick_back(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_gelu_quick_back_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a);

GGML_API struct ggml_tensor * ggml_silu(
struct ggml_context * ctx,
struct ggml_tensor * a);
Expand Down Expand Up @@ -1575,13 +1603,30 @@ extern "C" {
float beta_slow);

// clamp
// in-place, returns view(a)
GGML_API struct ggml_tensor * ggml_clamp(
struct ggml_context * ctx,
struct ggml_tensor * a,
float min,
float max);

GGML_API struct ggml_tensor * ggml_clamp_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
float min,
float max);

GGML_API struct ggml_tensor * ggml_clamp_back(
struct ggml_context * ctx,
struct ggml_tensor * a,
float min,
float max);

GGML_API struct ggml_tensor * ggml_clamp_back_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
float min,
float max);

GGML_API struct ggml_tensor * ggml_im2col(
struct ggml_context * ctx,
struct ggml_tensor * a,
Expand Down
4 changes: 4 additions & 0 deletions src/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2276,6 +2276,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
case GGML_OP_CLAMP:
ggml_cuda_op_clamp(ctx, dst);
break;
case GGML_OP_CLAMP_BACK:
ggml_cuda_op_clamp_back(ctx, dst);
break;
case GGML_OP_NONE:
case GGML_OP_RESHAPE:
case GGML_OP_VIEW:
Expand Down Expand Up @@ -2868,6 +2871,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
case GGML_OP_SIN:
case GGML_OP_COS:
case GGML_OP_CLAMP:
case GGML_OP_CLAMP_BACK:
case GGML_OP_CONT:
case GGML_OP_DIAG_MASK_INF:
case GGML_OP_SOFT_MAX:
Expand Down
32 changes: 32 additions & 0 deletions src/ggml-cuda/clamp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,21 @@ static void clamp_f32_cuda(const float * x, float * dst, const float min, const
clamp_f32<<<num_blocks, CUDA_CLAMP_BLOCK_SIZE, 0, stream>>>(x, dst, min, max, k);
}

static __global__ void clamp_back_f32(const float * x, float * dst, const float min, const float max, const int k) {
const int i = blockDim.x*blockIdx.x + threadIdx.x;

if (i >= k) {
return;
}

dst[i] = x[i] < min || x[i] > max ? 0.0f : 1.0f;
}

static void clamp_back_f32_cuda(const float * x, float * dst, const float min, const float max, const int k, cudaStream_t stream) {
const int num_blocks = (k + CUDA_CLAMP_BACK_BLOCK_SIZE - 1) / CUDA_CLAMP_BACK_BLOCK_SIZE;
clamp_back_f32<<<num_blocks, CUDA_CLAMP_BACK_BLOCK_SIZE, 0, stream>>>(x, dst, min, max, k);
}


void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
Expand All @@ -32,3 +47,20 @@ void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {

clamp_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream);
}

void ggml_cuda_op_clamp_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
const ggml_tensor * src0 = dst->src[0];
const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data;
cudaStream_t stream = ctx.stream();

GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT( dst->type == GGML_TYPE_F32);

float min;
float max;
memcpy(&min, dst->op_params, sizeof(float));
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));

clamp_back_f32_cuda(src0_d, dst_d, min, max, ggml_nelements(src0), stream);
}
2 changes: 2 additions & 0 deletions src/ggml-cuda/clamp.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "common.cuh"

#define CUDA_CLAMP_BLOCK_SIZE 256
#define CUDA_CLAMP_BACK_BLOCK_SIZE 256

void ggml_cuda_op_clamp(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_clamp_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
Loading
Loading