Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Metal] Add pixel_unshuffle & pixel_shuffle metal support,test=develop #10601

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lite/backends/metal/metal_kernel/texture/Common.metal
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ struct PixelShuffleParam {
int32_t upscale_factor;
};

struct PixelUnShuffleParam {
int32_t downscale_factor;
};

struct ExpandParam {
ushort fast;
ushort c;
Expand Down
32 changes: 4 additions & 28 deletions lite/backends/metal/metal_kernel/texture/PixelShuffle.metal
Original file line number Diff line number Diff line change
Expand Up @@ -17,50 +17,26 @@
#include "Common.metal"
using namespace metal;

kernel void pixel_shuffle(texture2d_array<float, access::sample> inTexture[[texture(0)]],
texture2d_array<float, access::write> outTexture[[texture(1)]],
kernel void pixel_shuffle(texture2d_array<ftype, access::sample> inTexture[[texture(0)]],
texture2d_array<ftype, access::write> outTexture[[texture(1)]],
constant PixelShuffleParam& param[[buffer(0)]],
uint3 gid[[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size())
return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);

int upscale_factor = param.upscale_factor;
int inX = gid.x / upscale_factor;
int inY = gid.y / upscale_factor;

float4 res;
for (int i = 0; i < 4; i++) {
int c = gid.z * 4 + i;
int inC = c * upscale_factor * upscale_factor + (gid.y % upscale_factor) * upscale_factor +
gid.x % upscale_factor;
float4 input = inTexture.read(uint2(inX, inY), inC / 4);
res[i] = input[inC % 4];
}

outTexture.write(res, gid.xy, gid.z);
}

kernel void pixel_shuffle_half(texture2d_array<half, access::sample> inTexture[[texture(0)]],
texture2d_array<half, access::write> outTexture[[texture(1)]],
constant PixelShuffleParam& param[[buffer(0)]],
uint3 gid[[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size())
return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);

int upscale_factor = param.upscale_factor;
int inX = gid.x / upscale_factor;
int inY = gid.y / upscale_factor;

half4 res;
ftype4 res;
for (int i = 0; i < 4; i++) {
int c = gid.z * 4 + i;
int inC = c * upscale_factor * upscale_factor + (gid.y % upscale_factor) * upscale_factor +
gid.x % upscale_factor;
half4 input = inTexture.read(uint2(inX, inY), inC / 4);
ftype4 input = inTexture.read(uint2(inX, inY), inC / 4);
res[i] = input[inC % 4];
}

Expand Down
47 changes: 47 additions & 0 deletions lite/backends/metal/metal_kernel/texture/PixelUnShuffle.metal
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "Common.metal"
#include <metal_stdlib>
using namespace metal;

kernel void pixel_unshuffle(texture2d_array<ftype, access::sample> inTexture[[texture(0)]],
texture2d_array<ftype, access::write> outTexture[[texture(1)]],
constant PixelUnShuffleParam& param[[buffer(0)]],
uint3 gid[[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size())
return;

int downscale_factor = param.downscale_factor;
int outX = gid.x * downscale_factor;
int outY = gid.y * downscale_factor;

ftype4 res = ftype4(0.0);

for (int i = 0; i < 4; i++) {
int c = gid.z * 4 + i;
int outC = c / (downscale_factor * downscale_factor);
int offset = c % (downscale_factor * downscale_factor);
int offset_h = offset / downscale_factor;
int offset_w = offset % downscale_factor;

int readX = outX + offset_w;
int readY = outY + offset_h;

ftype4 input = inTexture.read(uint2(readX, readY), outC / 4);
res[i] = input[outC % 4];
}
outTexture.write(res, gid.xy, gid.z);
}
3 changes: 2 additions & 1 deletion lite/kernels/metal/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ add_kernel(yolo_box_metal_image METAL basic SRCS image_op/yolo_box_image_compute
# add_kernel(grid_sampler_metal_image METAL basic SRCS image_op/grid_sampler_image_compute.mm)
# add_kernel(instance_norm_metal_image METAL basic SRCS image_op/instance_norm_image_compute.mm)
# add_kernel(lrn_metal_image METAL basic SRCS image_op/lrn_image_compute.mm)
# add_kernel(pixel_shuffle_metal_image METAL basic SRCS image_op/pixel_shuffle_image_compute.mm)
add_kernel(pixel_shuffle_metal_image METAL basic SRCS image_op/pixel_shuffle_image_compute.mm)
add_kernel(pixel_unshuffle_metal_image METAL basic SRCS image_op/pixel_unshuffle_image_compute.mm)
# add_kernel(rsqrt_metal_image METAL basic SRCS image_op/rsqrt_image_compute.mm)
# add_kernel(tanh_metal_image METAL basic SRCS image_op/tanh_image_compute.mm)
# add_kernel(prior_box_metal_image METAL basic SRCS image_op/prior_box_image_compute.mm)
Expand Down
4 changes: 4 additions & 0 deletions lite/kernels/metal/image_op/metal_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ struct PixelShuffleMetalParam {
int upscale_factor;
};

struct PixelUnShuffleMetalParam {
int downscale_factor;
};

struct ShuffleChannelMetalParam {
uint32_t group;
uint32_t channel_per_group;
Expand Down
21 changes: 13 additions & 8 deletions lite/kernels/metal/image_op/pixel_shuffle_image_compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#ifndef LITE_KERNELS_METAL_IMAGE_OP_PIXEL_SHUFFLE_IMAGE_COMPUTE_H_
#define LITE_KERNELS_METAL_IMAGE_OP_PIXEL_SHUFFLE_IMAGE_COMPUTE_H_

#include <memory>

Expand All @@ -32,9 +33,8 @@ namespace lite {
namespace kernels {
namespace metal {

template <typename P, PrecisionType PTYPE>
class PixelShuffleImageCompute
: public KernelLite<TARGET(kMetal), PTYPE, DATALAYOUT(kMetalTexture2DArray)> {
: public KernelLite<TARGET(kMetal), PRECISION(kFloat), DATALAYOUT(kMetalTexture2DArray)> {
using param_t = operators::PixelShuffleParam;

public:
Expand All @@ -43,18 +43,23 @@ class PixelShuffleImageCompute
void SaveOutput() override {
MetalDebug::SaveOutput("pixel_shuffle", output_buffer_);
};
virtual ~PixelShuffleImageCompute();

private:
void run_without_mps();
void setup_without_mps();

const MetalImage* input_buffer_;
MetalImage* output_buffer_;
std::shared_ptr<MetalBuffer> param_buffer_;
std::shared_ptr<MetalKernel> kernel_;
std::shared_ptr<MetalQueue> queue_;
std::shared_ptr<MetalEncoder> encoder_;
MetalImage* output_buffer_{nullptr};
std::shared_ptr<MetalBuffer> params_buffer_;

id<MTLComputePipelineState> pipline_;
std::string function_name_;
MetalContext* metal_context_;
};

} // namespace metal
} // namespace kernels
} // namespace lite
} // namespace paddle
#endif LITE_KERNELS_METAL_IMAGE_OP_PIXEL_SHUFFLE_IMAGE_COMPUTE_H_
89 changes: 46 additions & 43 deletions lite/kernels/metal/image_op/pixel_shuffle_image_compute.mm
Original file line number Diff line number Diff line change
Expand Up @@ -13,78 +13,76 @@
// limitations under the License.

#include "lite/kernels/metal/image_op/pixel_shuffle_image_compute.h"
#include "lite/backends/metal/metal_context_imp.h"
#include "lite/core/op_registry.h"
#include "lite/core/tensor.h"
#include "lite/kernels/metal/image_op/metal_params.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace metal {

template <typename P, PrecisionType PTYPE>
void PixelShuffleImageCompute<P, PTYPE>::PrepareForRun() {
auto& context = this->ctx_->template As<ContextMetal>();
void PixelShuffleImageCompute::PrepareForRun() {
auto& context = ctx_->As<MTLContext>();
metal_context_ = (MetalContext*)context.context();
auto device = metal_context_->GetDefaultDevice();

const auto& param = this->template Param<param_t>();
const auto& param = this->Param<param_t>();
auto output_dims = param.output->dims();

input_buffer_ = param.x->template data<P, MetalImage>();
output_buffer_ = param.output->template mutable_data<P, MetalImage>(output_dims);

PixelShuffleMetalParam metal_param{param.upscale_factor};
param_buffer_ = metal_context_->CreateBuffer(
*device, &metal_param, sizeof(metal_param), METAL_ACCESS_FLAG::CPUWriteOnly);
#ifdef LITE_WITH_METAL_FULL
#else
output_buffer_ = param.output->mutable_data<MetalHalf, MetalImage>(metal_context_, output_dims);
input_buffer_ = param.x->data<MetalHalf, MetalImage>();
#endif
setup_without_mps();
}

std::string function_name = "";
if (std::is_same<float, P>::value) {
function_name = "pixel_shuffle";
} else if (std::is_same<MetalHalf, P>::value) {
function_name = "pixel_shuffle_half";
void PixelShuffleImageCompute::Run() {
@autoreleasepool {
run_without_mps();
}
assert(!function_name.empty());

kernel_ = metal_context_->GetKernel(*device, function_name);
queue_ = metal_context_->GetDefaultQueue(*device);
}

template <typename P, PrecisionType PTYPE>
void PixelShuffleImageCompute<P, PTYPE>::Run() {
auto output_width = output_buffer_->texture_width_;
auto output_height = output_buffer_->texture_height_;
auto output_array_length = output_buffer_->array_length_;
void PixelShuffleImageCompute::run_without_mps() {
auto pipline = pipline_;
auto outTexture = output_buffer_->image();
auto backend = (__bridge MetalContextImp*)metal_context_->backend();

auto encoder = [backend commandEncoder];
[encoder setTexture:input_buffer_->image() atIndex:(0)];
[encoder setTexture:output_buffer_->image() atIndex:(1)];
[encoder setBuffer:params_buffer_->buffer() offset:(0) atIndex:(0)];

[backend dispatchEncoder:encoder pipline:pipline outTexture:outTexture];
[backend commit];
}

auto encoder =
std::make_shared<MetalEncoder>(metal_context_->cmd_buf_.get(), &kernel_->program_);
MetalUint3 global_work_size = {static_cast<MetalUint>(output_width),
static_cast<MetalUint>(output_height),
static_cast<MetalUint>(output_array_length)};
void PixelShuffleImageCompute::setup_without_mps() {
const auto& param = this->Param<param_t>();

[encoder->metal_command_encoder_ setTexture:(input_buffer_->image()) atIndex:(0)];
[encoder->metal_command_encoder_ setTexture:(output_buffer_->image()) atIndex:(1)];
[encoder->metal_command_encoder_ setBuffer:(param_buffer_->buffer()) offset:(0) atIndex:(0)];
PixelShuffleMetalParam params{param.upscale_factor};
params_buffer_ = std::make_shared<MetalBuffer>(metal_context_, sizeof(params), &params);
function_name_ = "pixel_shuffle";
// pipline
auto backend = (__bridge MetalContextImp*)metal_context_->backend();
pipline_ = [backend pipline:function_name_];
}

kernel_->Execute(*encoder, global_work_size, false);
PixelShuffleImageCompute::~PixelShuffleImageCompute() {
TargetWrapperMetal::FreeImage(output_buffer_);
}

} // namespace metal
} // namespace kernels
} // namespace lite
} // namespace paddle

template class paddle::lite::kernels::metal::PixelShuffleImageCompute<float, PRECISION(kFloat)>;
template class paddle::lite::kernels::metal::PixelShuffleImageCompute<MetalHalf, PRECISION(kFP16)>;
typedef paddle::lite::kernels::metal::PixelShuffleImageCompute<float, PRECISION(kFloat)>
MetalPixelShuffleFp32;
typedef paddle::lite::kernels::metal::PixelShuffleImageCompute<MetalHalf, PRECISION(kFP16)>
MetalPixelShuffleFp16;

REGISTER_LITE_KERNEL(pixel_shuffle,
kMetal,
kFloat,
kMetalTexture2DArray,
MetalPixelShuffleFp32,
paddle::lite::kernels::metal::PixelShuffleImageCompute,
def)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kMetal),
Expand All @@ -96,7 +94,12 @@
DATALAYOUT(kMetalTexture2DArray))})
.Finalize();

REGISTER_LITE_KERNEL(pixel_shuffle, kMetal, kFP16, kMetalTexture2DArray, MetalPixelShuffleFp16, def)
REGISTER_LITE_KERNEL(pixel_shuffle,
kMetal,
kFP16,
kMetalTexture2DArray,
paddle::lite::kernels::metal::PixelShuffleImageCompute,
def)
.BindInput("X",
{LiteType::GetTensorTy(TARGET(kMetal), PRECISION(kFP16), DATALAYOUT(kMetalTexture2DArray))})
.BindOutput("Out",
Expand Down
71 changes: 71 additions & 0 deletions lite/kernels/metal/image_op/pixel_unshuffle_image_compute.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef LITE_KERNELS_METAL_IMAGE_OP_PIXEL_UNSHUFFLE_IMAGE_COMPUTE_H_
#define LITE_KERNELS_METAL_IMAGE_OP_PIXEL_UNSHUFFLE_IMAGE_COMPUTE_H_

#include <memory>

#include "lite/core/kernel.h"
#include "lite/core/tensor.h"
#include "lite/operators/op_params.h"

#ifdef LITE_WITH_PROFILE
#include "lite/core/profile/profiler.h"
#endif

#include "lite/backends/metal/metal_context.h"
#include "lite/backends/metal/metal_debug.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace metal {

class PixelUnShuffleImageCompute
: public KernelLite<TARGET(kMetal), PRECISION(kFloat), DATALAYOUT(kMetalTexture2DArray)> {
using param_t = operators::PixelUnShuffleParam;

public:
void PrepareForRun() override;
void Run() override;
void SaveOutput() override {
MetalDebug::SaveOutput("pixel_unshuffle", output_buffer_);
};
virtual ~PixelUnShuffleImageCompute();

private:
bool use_mps_{false};
void* mps_pool_op_{nullptr};
void* mps_input_image_{nullptr};
void* mps_output_image_{nullptr};

void run_without_mps();
void setup_without_mps();

const MetalImage* input_buffer_;
MetalImage* output_buffer_{nullptr};
std::shared_ptr<MetalBuffer> params_buffer_;

id<MTLComputePipelineState> pipline_;
std::string function_name_;
MetalContext* metal_context_;
};

} // namespace metal
} // namespace kernels
} // namespace lite
} // namespace paddle

#endif LITE_KERNELS_METAL_IMAGE_OP_PIXEL_UNSHUFFLE_IMAGE_COMPUTE_H_
Loading