From 4758e3ccd42fe995770f66287b96acc52b01900e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Wed, 30 Oct 2024 23:17:23 +0100
Subject: [PATCH 1/2] Generate LB GPU PackInfo kernels

---
 maintainer/benchmarks/lb.py                   |   19 +-
 maintainer/walberla_kernels/Readme.md         |    1 -
 .../custom_additional_extensions.py           |   45 +
 .../walberla_kernels/generate_lb_kernels.py   |  116 +-
 .../walberla_kernels/pystencils_espresso.py   |    7 +-
 .../templates/preprocessor.tmpl.cuh           |  114 ++
 .../src/lattice_boltzmann/LBWalberlaImpl.hpp  |   53 +-
 .../generated_kernels/CMakeLists.txt          |    4 +
 .../Dynamic_UBB_double_precisionCUDA.cu       |   24 +-
 .../Dynamic_UBB_single_precisionCUDA.cu       |   24 +-
 .../PackInfoPdfDoublePrecision.cpp            |    2 -
 .../PackInfoPdfDoublePrecisionCUDA.cu         | 1423 +++++++++++++++++
 .../PackInfoPdfDoublePrecisionCUDA.h          |   64 +
 .../PackInfoPdfSinglePrecision.cpp            |    2 -
 .../PackInfoPdfSinglePrecisionCUDA.cu         | 1423 +++++++++++++++++
 .../PackInfoPdfSinglePrecisionCUDA.h          |   64 +
 .../PackInfoVecDoublePrecision.cpp            |    2 -
 .../PackInfoVecDoublePrecisionCUDA.cu         |  243 +++
 .../PackInfoVecDoublePrecisionCUDA.h          |   64 +
 .../PackInfoVecSinglePrecision.cpp            |    2 -
 .../PackInfoVecSinglePrecisionCUDA.cu         |  243 +++
 .../PackInfoVecSinglePrecisionCUDA.h          |   64 +
 .../src/lattice_boltzmann/lb_kernels.cuh      |    8 +
 src/walberla_bridge/tests/CMakeLists.txt      |    6 +-
 testsuite/scripts/benchmarks/CMakeLists.txt   |    2 +-
 25 files changed, 3927 insertions(+), 92 deletions(-)
 create mode 100644 maintainer/walberla_kernels/templates/preprocessor.tmpl.cuh
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.cu
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.cu
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.cu
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.h
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.cu
 create mode 100644 src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.h

diff --git a/maintainer/benchmarks/lb.py b/maintainer/benchmarks/lb.py
index ea42b420051..04453c5ea7b 100644
--- a/maintainer/benchmarks/lb.py
+++ b/maintainer/benchmarks/lb.py
@@ -31,7 +31,7 @@
 parser.add_argument("--particles_per_core", metavar="N", action="store",
                     type=int, default=125, required=False,
                     help="Number of particles per core")
-parser.add_argument("--box_l", action="store",
+parser.add_argument("--box_l", action="store", nargs="+",
                     type=int, default=argparse.SUPPRESS, required=False,
                     help="Box length (cubic box)")
 parser.add_argument("--lb_sites_per_particle", metavar="N_LB", action="store",
@@ -45,6 +45,8 @@
                     help="Using single-precision floating point accuracy")
 parser.add_argument("--gpu", action=argparse.BooleanOptionalAction,
                     default=False, required=False, help="Use GPU implementation")
+parser.add_argument("--multi-gpu", action=argparse.BooleanOptionalAction,
+                    default=False, required=False, help="Use multi-GPU implementation")
 parser.add_argument("--output", metavar="FILEPATH", action="store",
                     type=str, required=False, default="benchmarks.csv",
                     help="Output file (default: benchmarks.csv)")
@@ -83,9 +85,9 @@
 n_proc = system.cell_system.get_state()["n_nodes"]
 n_part = n_proc * args.particles_per_core
 if n_part == 0:
-    box_l = args.box_l
+    box_l = 3 * args.box_l if len(args.box_l) == 1 else args.box_l
     agrid = 1.
-    lb_grid = args.box_l
+    lb_grid = box_l
     measurement_steps = 80
 else:
     # volume of N spheres with radius r: N * (4/3*pi*r^3)
@@ -96,13 +98,16 @@
     agrid = box_l / lb_grid
     measurement_steps = max(50, int(120**3 / lb_grid**3))
     measurement_steps = 40
+    lb_grid = 3 * [lb_grid]
+    box_l = 3 * [box_l]
 
-print(f"LB shape: [{lb_grid}, {lb_grid}, {lb_grid}]")
+print(f"box length: {box_l}")
+print(f"LB shape: {lb_grid}")
 print(f"LB agrid: {agrid:.3f}")
 
 # System
 #############################################################
-system.box_l = 3 * (box_l,)
+system.box_l = box_l
 
 # Integration parameters
 #############################################################
@@ -135,8 +140,10 @@
 # LB fluid setup
 #############################################################
 lb_class = espressomd.lb.LBFluidWalberla
-if args.gpu:
+if args.gpu or args.multi_gpu:
     lb_class = espressomd.lb.LBFluidWalberlaGPU
+if args.multi_gpu:
+    system.cuda_init_handle.call_method("set_device_id_per_rank")
 lbf = lb_class(agrid=agrid, tau=system.time_step, kinematic_viscosity=1.,
                density=1., single_precision=args.single_precision)
 system.lb = lbf
diff --git a/maintainer/walberla_kernels/Readme.md b/maintainer/walberla_kernels/Readme.md
index ef7f2fb0c5e..39f1a7e41a6 100644
--- a/maintainer/walberla_kernels/Readme.md
+++ b/maintainer/walberla_kernels/Readme.md
@@ -49,7 +49,6 @@ generate_lb_kernels --single-precision
 generate_lb_kernels --gpu
 generate_lb_kernels --gpu --single-precision
 format_lb_kernels
-git diff src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_*CUDA*.cu # verify pragmas
 
 # EK kernels
 cd $(git rev-parse --show-toplevel)/src/walberla_bridge/src/electrokinetics/generated_kernels/
diff --git a/maintainer/walberla_kernels/custom_additional_extensions.py b/maintainer/walberla_kernels/custom_additional_extensions.py
index 19d8e3f22a2..3115f7ffe7e 100644
--- a/maintainer/walberla_kernels/custom_additional_extensions.py
+++ b/maintainer/walberla_kernels/custom_additional_extensions.py
@@ -349,3 +349,48 @@ def generate_kernel_selector(
         "templates/ReactionKernelSelector.tmpl.h").render(**context)
 
     generation_context.write_file(f"{class_name}_all.h", header)
+
+
+def generate_device_preprocessor(kernel, defines=()):
+    """
+    Generate device preprocessor directives.
+    """
+    pragmas = {
+        "packinfo": {
+            "nvcc": ["diag_suppress 177 // unused variable"],
+            "clang_host": ["-Wunused-variable"],
+            "clang_dev": ["-Wunused-variable"],
+            "gcc": ["-Wunused-variable"],
+        },
+        "ubb_boundary": {
+            "nvcc": ["diag_suppress 177 // unused variable"],
+            "clang_host": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion", "-Wsign-compare"],  # nopep8
+            "clang_dev": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion", "-Wsign-compare"],  # nopep8
+            "gcc": ["-Wstrict-aliasing", "-Wunused-variable", "-Wconversion"],
+        },
+    }
+
+    defines_table = {
+        "nvcc": {"RESTRICT": "__restrict__", "FUNC_PREFIX": "__global__"},
+        "msvc": {"RESTRICT": "__restrict", "FUNC_PREFIX": ""},
+        "clang_host": {"RESTRICT": "__restrict__", "FUNC_PREFIX": ""},
+        "clang_dev": {"RESTRICT": "__restrict__", "FUNC_PREFIX": "__global__"},
+        "gcc": {"RESTRICT": "__restrict__", "FUNC_PREFIX": ""},
+        "other": {"RESTRICT": "", "FUNC_PREFIX": ""},
+    }
+
+    context = {
+        "pragmas": pragmas[kernel],
+        "defines_table": defines_table,
+        "defines": defines,
+    }
+
+    custom_env = jinja2.Environment(
+        loader=jinja2.FileSystemLoader(pathlib.Path(__file__).parent),
+        undefined=jinja2.StrictUndefined
+    )
+
+    content = custom_env.get_template(
+        "templates/preprocessor.tmpl.cuh").render(**context)
+
+    return content.split("\n/* section */\n")[1:]
diff --git a/maintainer/walberla_kernels/generate_lb_kernels.py b/maintainer/walberla_kernels/generate_lb_kernels.py
index 21300af8947..bef9badc541 100644
--- a/maintainer/walberla_kernels/generate_lb_kernels.py
+++ b/maintainer/walberla_kernels/generate_lb_kernels.py
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2020-2023 The ESPResSo project
+# Copyright (C) 2020-2024 The ESPResSo project
 #
 # This file is part of ESPResSo.
 #
@@ -17,6 +17,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 
+import re
 import argparse
 import packaging.specifiers
 
@@ -40,6 +41,7 @@
 import relaxation_rates
 import walberla_lbm_generation
 import code_generation_context
+import custom_additional_extensions
 
 parser = argparse.ArgumentParser(description="Generate the waLBerla kernels.")
 parser.add_argument("--single-precision", action="store_true", required=False,
@@ -65,6 +67,24 @@ def paramlist(parameters, keys):
             yield parameters[key]
 
 
+def get_ext_header(target_suffix):
+    return {"CUDA": "h"}.get(target_suffix, "h")
+
+
+def get_ext_source(target_suffix):
+    return {"CUDA": "cu"}.get(target_suffix, "cpp")
+
+
+def patch_file(class_name, extension, target_suffix, patch):
+    with open(f"{class_name}.{extension}", "r+") as f:
+        old_content = f.read()
+        new_content = patch(old_content, target_suffix)
+        if new_content != old_content:
+            f.seek(0)
+            f.truncate()
+            f.write(new_content)
+
+
 with code_generation_context.CodeGeneration() as ctx:
     ctx.double_accuracy = not args.single_precision
     if target == ps.Target.GPU:
@@ -196,26 +216,57 @@ def paramlist(parameters, keys):
     # generate PackInfo
     assignments = pystencils_espresso.generate_pack_info_pdfs_field_assignments(
         fields, streaming_pattern="pull")
-    spec = pystencils_espresso.generate_pack_info_vector_field_specifications(
+    spec = pystencils_espresso.generate_pack_info_field_specifications(
         config, stencil, force_field.layout)
-    for params, target_suffix in paramlist(parameters, ["CPU"]):
+
+    def patch_packinfo_header(content, target_suffix):
+        if target_suffix in ["", "AVX"]:
+            token = "\n       //TODO: optimize by generating kernel for this case\n"
+            assert token in content
+            content = content.replace(token, "\n")
+            ft = "float" if "SinglePrecision" in content else "double"
+            token = " pack(dir, outBuffer.forward(dataSize)"
+            assert token in content
+            content = content.replace(token, f"{token[:-1]} + sizeof({ft}))")
+            token = " unpack(dir, buffer.skip(dataSize)"
+            assert token in content
+            content = content.replace(token, f"{token[:-1]} + sizeof({ft}))")
+        elif target_suffix in ["CUDA"]:
+            token = "#define FUNC_PREFIX __global__"
+            assert token in content
+            content = content.replace(token, "")
+            content = re.sub(r"#ifdef __GNUC__[\s\S]+?#endif\n\n", "", content)
+        return content
+
+    def patch_packinfo_kernel(content, target_suffix):
+        if target_suffix in ["", "AVX"]:
+            # fix MPI buffer
+            m = re.search("(float|double) *\* *buffer = reinterpret_cast<(?:float|double) *\*>\(byte_buffer\);\n", content)  # nopep8
+            assert m is not None
+            content = content.replace(m.group(0), f"byte_buffer += sizeof({m.group(1)}) - (reinterpret_cast<std::size_t>(byte_buffer) - (reinterpret_cast<std::size_t>(byte_buffer) / sizeof({m.group(1)})) * sizeof({m.group(1)}));\n  {m.group(0)}")  # nopep8
+        if target_suffix in ["CUDA"]:
+            token = "#define FUNC_PREFIX __global__"
+            assert token in content
+            push, _ = custom_additional_extensions.generate_device_preprocessor(
+                "packinfo", defines=("RESTRICT",))
+            content = content.replace(token, f"{token}\n{push}")
+            token = '#include "PackInfo'
+            assert token in content
+            content = content.replace(token, f'#include "core/DataTypes.h"\n#include "core/cell/CellInterval.h"\n#include "domain_decomposition/IBlock.h"\n#include "stencil/Directions.h"\n\n{token}')  # nopep8
+        return content
+
+    for params, target_suffix in paramlist(parameters, ["CPU", "GPU"]):
         pystencils_walberla.generate_pack_info_from_kernel(
             ctx, f"PackInfoPdf{precision_prefix}{target_suffix}", assignments,
             kind="pull", **params)
         pystencils_walberla.generate_pack_info(
             ctx, f"PackInfoVec{precision_prefix}{target_suffix}", spec, **params)
-        if target_suffix == "CUDA":
-            continue
-        token = "\n       //TODO: optimize by generating kernel for this case\n"
-        for field_suffix in ["Pdf", "Vec"]:
-            class_name = f"PackInfo{field_suffix}{precision_prefix}{target_suffix}"  # nopep8
-            with open(f"{class_name}.h", "r+") as f:
-                content = f.read()
-                assert token in content
-                content = content.replace(token, "\n")
-                f.seek(0)
-                f.truncate()
-                f.write(content)
+        for suffix in ["Pdf", "Vec"]:
+            class_name = f"PackInfo{suffix}{precision_prefix}{target_suffix}"
+            patch_file(class_name, get_ext_header(target_suffix),
+                       target_suffix, patch_packinfo_header)
+            patch_file(class_name, get_ext_source(target_suffix),
+                       target_suffix, patch_packinfo_kernel)
 
     # boundary conditions
     ubb_dynamic = lbmpy_espresso.UBB(
@@ -223,17 +274,28 @@ def paramlist(parameters, keys):
     ubb_data_handler = lbmpy_espresso.BounceBackSlipVelocityUBB(
         method.stencil, ubb_dynamic)
 
-    for _, target_suffix in paramlist(parameters, ("GPU", "CPU")):
+    # pylint: disable=unused-argument
+    def patch_boundary_header(content, target_suffix):
+        return content.replace("real_t", config.data_type.default_factory().c_name)  # nopep8
+
+    def patch_boundary_kernel(content, target_suffix):
+        if target_suffix in ["CUDA"]:
+            push, pop = custom_additional_extensions.generate_device_preprocessor(
+                "ubb_boundary", defines=("RESTRICT",))
+            content = re.sub(r"#ifdef __GNUC__[\s\S]+?#endif(?=\n\n|\n//)", "", content)  # nopep8
+            content = re.sub(r"#ifdef __CUDACC__[\s\S]+?#endif(?=\n\n|\n//)", push, content, 1)  # nopep8
+            content = re.sub(r"#ifdef __CUDACC__[\s\S]+?#endif(?=\n\n|\n//)", pop, content, 1)  # nopep8
+            assert push in content
+            assert pop in content
+        return content
+
+    for _, target_suffix in paramlist(parameters, ("CPU", "GPU")):
+        class_name = f"Dynamic_UBB_{precision_suffix}{target_suffix}"
         lbmpy_walberla.generate_boundary(
-            ctx, f"Dynamic_UBB_{precision_suffix}{target_suffix}", ubb_dynamic,
-            method, additional_data_handler=ubb_data_handler,
+            ctx, class_name, ubb_dynamic, method,
+            additional_data_handler=ubb_data_handler,
             streaming_pattern=streaming_pattern, target=target)
-
-        with open(f"Dynamic_UBB_{precision_suffix}{target_suffix}.h", "r+") as f:
-            content = f.read()
-            f.seek(0)
-            f.truncate(0)
-            # patch for floating point accuracy
-            content = content.replace("real_t",
-                                      config.data_type.default_factory().c_name)
-            f.write(content)
+        patch_file(class_name, get_ext_header(target_suffix),
+                   target_suffix, patch_boundary_header)
+        patch_file(class_name, get_ext_source(target_suffix),
+                   target_suffix, patch_boundary_kernel)
diff --git a/maintainer/walberla_kernels/pystencils_espresso.py b/maintainer/walberla_kernels/pystencils_espresso.py
index 3cf6edfbf39..1fd3321aeb9 100644
--- a/maintainer/walberla_kernels/pystencils_espresso.py
+++ b/maintainer/walberla_kernels/pystencils_espresso.py
@@ -239,7 +239,8 @@ def generate_pack_info_pdfs_field_assignments(fields, streaming_pattern):
     return lbm_update_rule.all_assignments
 
 
-def generate_pack_info_vector_field_specifications(config, stencil, layout):
+def generate_pack_info_field_specifications(
+        config, stencil, layout, vec_len=3):
     import collections
     import itertools
     field = ps.Field.create_generic(
@@ -248,7 +249,7 @@ def generate_pack_info_vector_field_specifications(config, stencil, layout):
         data_type_np[config.data_type.default_factory().c_name],
         index_dimensions=1,
         layout=layout,
-        index_shape=(3,)
+        index_shape=(vec_len,)
     )
     q = len(stencil)
     coord = itertools.product(*[(-1, 0, 1)] * 3)
@@ -257,7 +258,7 @@ def generate_pack_info_vector_field_specifications(config, stencil, layout):
     else:
         dirs = tuple((i, j, k) for i, j, k in coord)
     spec = collections.defaultdict(set)
-    spec[dirs] = {field[0, 0, 0](i) for i in range(3)}
+    spec[dirs] = {field[0, 0, 0](i) for i in range(vec_len)}
     return spec
 
 
diff --git a/maintainer/walberla_kernels/templates/preprocessor.tmpl.cuh b/maintainer/walberla_kernels/templates/preprocessor.tmpl.cuh
new file mode 100644
index 00000000000..16cefedbaea
--- /dev/null
+++ b/maintainer/walberla_kernels/templates/preprocessor.tmpl.cuh
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2024 The ESPResSo project
+ * Copyright (C) 2024 The waLBerla project
+ *
+ * This file is part of ESPResSo.
+ *
+ * ESPResSo is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * ESPResSo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* section */
+
+#if defined(__NVCC__)
+{% for name in defines -%}
+#define {{name}} {{defines_table["nvcc"][name]}}
+{% endfor -%}
+{% if pragmas["nvcc"] -%}
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic push
+{% for pragma in pragmas["nvcc"] -%}
+#pragma nv_{{pragma}}
+{% endfor -%}
+#else
+#pragma push
+{% for pragma in pragmas["nvcc"] -%}
+#pragma {{pragma}}
+{% endfor -%}
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+{% endif -%}
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+{% for name in defines -%}
+#define {{name}} {{defines_table["clang_dev"][name]}}
+{% endfor -%}
+{% if pragmas["clang_dev"] -%}
+#pragma clang diagnostic push
+{% for pragma in pragmas["clang_dev"] -%}
+#pragma clang diagnostic ignored "{{pragma}}"
+{% endfor -%}
+{% endif -%}
+#else
+// clang compiling CUDA code in host mode
+{% for name in defines -%}
+#define {{name}} {{defines_table["clang_host"][name]}}
+{% endfor -%}
+{% if pragmas["clang_host"] -%}
+#pragma clang diagnostic push
+{% for pragma in pragmas["clang_host"] -%}
+#pragma clang diagnostic ignored "{{pragma}}"
+{% endfor -%}
+{% endif -%}
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+{% for name in defines -%}
+#define {{name}} {{defines_table["gcc"][name]}}
+{% endfor -%}
+{% if pragmas["gcc"] -%}
+#pragma GCC diagnostic push
+{% for pragma in pragmas["gcc"] -%}
+#pragma GCC diagnostic ignored "{{pragma}}"
+{% endfor -%}
+{% endif -%}
+#elif defined(_MSC_VER)
+{% for name in defines -%}
+#define {{name}} {{defines_table["msvc"][name]}}
+{% endfor -%}
+#else
+{% for name in defines -%}
+#define {{name}} {{defines_table["other"][name]}}
+{% endfor -%}
+#endif
+
+/* section */
+
+#if defined(__NVCC__)
+{% if pragmas["nvcc"] -%}
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic pop
+#else
+#pragma pop
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+{% endif -%}
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+{% if pragmas["clang_dev"] -%}
+#pragma clang diagnostic pop
+{% endif -%}
+#else
+{% if pragmas["clang_host"] -%}
+// clang compiling CUDA code in host mode
+#pragma clang diagnostic pop
+{% endif -%}
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+{% if pragmas["gcc"] -%}
+#pragma GCC diagnostic pop
+{% endif -%}
+#endif
diff --git a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
index 6f1fedae105..8986fed7b4d 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/LBWalberlaImpl.hpp
@@ -119,11 +119,10 @@ class LBWalberlaImpl : public LBWalberlaBase {
     using VectorField = field::GhostLayerField<FT, uint_t{3u}>;
     template <class Field>
     using PackInfo = field::communication::PackInfo<Field>;
-    template <class Field>
-    using PackInfoStreaming =
-        std::conditional_t<std::is_same_v<Field, PdfField>,
-                           typename detail::KernelTrait<FT, AT>::PackInfoPdf,
-                           typename detail::KernelTrait<FT, AT>::PackInfoVec>;
+    using PackInfoStreamingPdf =
+        typename detail::KernelTrait<FT, AT>::PackInfoPdf;
+    using PackInfoStreamingVec =
+        typename detail::KernelTrait<FT, AT>::PackInfoVec;
     template <class Stencil>
     using RegularCommScheme =
         blockforest::communication::UniformBufferedScheme<Stencil>;
@@ -134,14 +133,30 @@ class LBWalberlaImpl : public LBWalberlaBase {
 
 #if defined(__CUDACC__)
   template <typename FT> struct FieldTrait<FT, lbmpy::Arch::GPU> {
+  private:
+    static auto constexpr AT = lbmpy::Arch::GPU;
+    template <class Field>
+    using MemcpyPackInfo = gpu::communication::MemcpyPackInfo<Field>;
+
+  public:
+    template <typename Stencil>
+    class UniformGPUScheme
+        : public gpu::communication::UniformGPUScheme<Stencil> {
+    public:
+      explicit UniformGPUScheme(auto const &bf)
+          : gpu::communication::UniformGPUScheme<Stencil>(
+                bf, /* sendDirectlyFromGPU */ false,
+                /* useLocalCommunication */ false) {}
+    };
     using PdfField = gpu::GPUField<FT>;
     using VectorField = gpu::GPUField<FT>;
-    template <class Field>
-    using PackInfo = gpu::communication::MemcpyPackInfo<Field>;
-    template <class Field>
-    using PackInfoStreaming = gpu::communication::MemcpyPackInfo<Field>;
+    template <class Field> using PackInfo = MemcpyPackInfo<Field>;
+    using PackInfoStreamingPdf =
+        typename detail::KernelTrait<FT, AT>::PackInfoPdf;
+    using PackInfoStreamingVec =
+        typename detail::KernelTrait<FT, AT>::PackInfoVec;
     template <class Stencil>
-    using RegularCommScheme = gpu::communication::UniformGPUScheme<Stencil>;
+    using RegularCommScheme = UniformGPUScheme<Stencil>;
     template <class Stencil>
     using BoundaryCommScheme =
         blockforest::communication::UniformBufferedScheme<Stencil>;
@@ -315,10 +330,6 @@ class LBWalberlaImpl : public LBWalberlaBase {
   template <class Field>
   using PackInfo =
       typename FieldTrait<FloatType, Architecture>::template PackInfo<Field>;
-  template <class Field>
-  using PackInfoStreaming =
-      typename FieldTrait<FloatType,
-                          Architecture>::template PackInfoStreaming<Field>;
 
   // communicators
   std::shared_ptr<BoundaryFullCommunicator> m_boundary_communicator;
@@ -427,20 +438,22 @@ class LBWalberlaImpl : public LBWalberlaBase {
   }
 
   void setup_streaming_communicator() {
-    auto const setup = [this]<typename PdfPackInfo>() {
+    auto const setup = [this]<typename PackInfoPdf, typename PackInfoVec>() {
       auto const &blocks = m_lattice->get_blocks();
       m_pdf_streaming_communicator =
           std::make_shared<PDFStreamingCommunicator>(blocks);
       m_pdf_streaming_communicator->addPackInfo(
-          std::make_shared<PdfPackInfo>(m_pdf_field_id));
+          std::make_shared<PackInfoPdf>(m_pdf_field_id));
       m_pdf_streaming_communicator->addPackInfo(
-          std::make_shared<PackInfoStreaming<VectorField>>(
-              m_last_applied_force_field_id));
+          std::make_shared<PackInfoVec>(m_last_applied_force_field_id));
     };
+    using FieldTrait = FieldTrait<FloatType, Architecture>;
+    using PackInfoPdf = typename FieldTrait::PackInfoStreamingPdf;
+    using PackInfoVec = typename FieldTrait::PackInfoStreamingVec;
     if (m_has_boundaries or (m_collision_model and has_lees_edwards_bc())) {
-      setup.template operator()<PackInfo<PdfField>>();
+      setup.template operator()<PackInfo<PdfField>, PackInfoVec>();
     } else {
-      setup.template operator()<PackInfoStreaming<PdfField>>();
+      setup.template operator()<PackInfoPdf, PackInfoVec>();
     }
   }
 
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt
index 434d968d52a..eadeb04c2fd 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/CMakeLists.txt
@@ -54,6 +54,10 @@ if(ESPRESSO_BUILD_WITH_CUDA AND WALBERLA_BUILD_WITH_CUDA)
             StreamSweepSinglePrecisionCUDA.cu
             InitialPDFsSetterDoublePrecisionCUDA.cu
             InitialPDFsSetterSinglePrecisionCUDA.cu
+            PackInfoPdfSinglePrecisionCUDA.cu
+            PackInfoPdfDoublePrecisionCUDA.cu
+            PackInfoVecSinglePrecisionCUDA.cu
+            PackInfoVecDoublePrecisionCUDA.cu
             Dynamic_UBB_double_precisionCUDA.cu
             Dynamic_UBB_single_precisionCUDA.cu)
 endif()
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu
index 8d35b5d929f..0fd77e065cb 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_double_precisionCUDA.cu
@@ -39,7 +39,7 @@ namespace lbm {
 #else
 #pragma push
 #pragma diag_suppress 177 // unused variable
-#endif
+#endif                    // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
 #elif defined(__clang__)
 #if defined(__CUDA__)
 #if defined(__CUDA_ARCH__)
@@ -58,8 +58,8 @@ namespace lbm {
 #pragma clang diagnostic ignored "-Wunused-variable"
 #pragma clang diagnostic ignored "-Wconversion"
 #pragma clang diagnostic ignored "-Wsign-compare"
-#endif
-#endif
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
 #elif defined(__GNUC__) or defined(__GNUG__)
 #define RESTRICT __restrict__
 #pragma GCC diagnostic push
@@ -128,7 +128,13 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_double_precisioncuda_
 
 // NOLINTEND(readability-non-const-parameter*)
 
-#if defined(__clang__)
+#if defined(__NVCC__)
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic pop
+#else
+#pragma pop
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
 #if defined(__CUDA__)
 #if defined(__CUDA_ARCH__)
 // clang compiling CUDA code in device mode
@@ -136,16 +142,10 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_double_precisioncuda_
 #else
 // clang compiling CUDA code in host mode
 #pragma clang diagnostic pop
-#endif
-#endif
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
 #elif defined(__GNUC__) or defined(__GNUG__)
 #pragma GCC diagnostic pop
-#elif defined(__CUDACC__)
-#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
-#pragma nv_diagnostic pop
-#else
-#pragma pop
-#endif
 #endif
 
 void Dynamic_UBB_double_precisionCUDA::run_impl(IBlock *block, IndexVectors::Type type, gpuStream_t stream) {
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu
index a046e6f9a46..4ed013d81e4 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/Dynamic_UBB_single_precisionCUDA.cu
@@ -39,7 +39,7 @@ namespace lbm {
 #else
 #pragma push
 #pragma diag_suppress 177 // unused variable
-#endif
+#endif                    // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
 #elif defined(__clang__)
 #if defined(__CUDA__)
 #if defined(__CUDA_ARCH__)
@@ -58,8 +58,8 @@ namespace lbm {
 #pragma clang diagnostic ignored "-Wunused-variable"
 #pragma clang diagnostic ignored "-Wconversion"
 #pragma clang diagnostic ignored "-Wsign-compare"
-#endif
-#endif
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
 #elif defined(__GNUC__) or defined(__GNUG__)
 #define RESTRICT __restrict__
 #pragma GCC diagnostic push
@@ -128,7 +128,13 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_single_precisioncuda_
 
 // NOLINTEND(readability-non-const-parameter*)
 
-#if defined(__clang__)
+#if defined(__NVCC__)
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic pop
+#else
+#pragma pop
+#endif // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
 #if defined(__CUDA__)
 #if defined(__CUDA_ARCH__)
 // clang compiling CUDA code in device mode
@@ -136,16 +142,10 @@ static FUNC_PREFIX __launch_bounds__(256) void dynamic_ubb_single_precisioncuda_
 #else
 // clang compiling CUDA code in host mode
 #pragma clang diagnostic pop
-#endif
-#endif
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
 #elif defined(__GNUC__) or defined(__GNUG__)
 #pragma GCC diagnostic pop
-#elif defined(__CUDACC__)
-#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
-#pragma nv_diagnostic pop
-#else
-#pragma pop
-#endif
 #endif
 
 void Dynamic_UBB_single_precisionCUDA::run_impl(IBlock *block, IndexVectors::Type type, gpuStream_t stream) {
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp
index 1ab45417dcb..6503551664c 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecision.cpp
@@ -24,8 +24,6 @@
 #include "core/cell/CellInterval.h"
 #include "stencil/Directions.h"
 
-#include <cstddef>
-
 #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wfloat-equal"
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.cu
new file mode 100644
index 00000000000..5636dad6a32
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.cu
@@ -0,0 +1,1423 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file PackInfoPdfDoublePrecisionCUDA.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "domain_decomposition/IBlock.h"
+#include "stencil/Directions.h"
+
+#include "PackInfoPdfDoublePrecisionCUDA.h"
+
+#define FUNC_PREFIX __global__
+
+#if defined(__NVCC__)
+#define RESTRICT __restrict__
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 177 // unused variable
+#else
+#pragma push
+#pragma diag_suppress 177 // unused variable
+#endif                    // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#else
+// clang compiling CUDA code in host mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+#define RESTRICT __restrict__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#elif defined(_MSC_VER)
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+using walberla::cell::CellInterval;
+using walberla::stencil::Direction;
+
+namespace internal_pack_SW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_SW
+
+namespace internal_pack_BW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_BW
+
+namespace internal_pack_W {
+static FUNC_PREFIX __launch_bounds__(256) void pack_W(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_W
+
+namespace internal_pack_TW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_TW
+
+namespace internal_pack_NW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_NW(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_NW
+
+namespace internal_pack_BS {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BS(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_BS
+
+namespace internal_pack_S {
+static FUNC_PREFIX __launch_bounds__(256) void pack_S(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_S
+
+namespace internal_pack_TS {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TS(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_TS
+
+namespace internal_pack_B {
+static FUNC_PREFIX __launch_bounds__(256) void pack_B(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_B
+
+namespace internal_pack_T {
+static FUNC_PREFIX __launch_bounds__(256) void pack_T(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_T
+
+namespace internal_pack_BN {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BN(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_BN
+
+namespace internal_pack_N {
+static FUNC_PREFIX __launch_bounds__(256) void pack_N(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_N
+
+namespace internal_pack_TN {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TN(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_TN
+
+namespace internal_pack_SE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_SE
+
+namespace internal_pack_BE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_BE
+
+namespace internal_pack_E {
+static FUNC_PREFIX __launch_bounds__(256) void pack_E(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_E
+
+namespace internal_pack_TE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_TE
+
+namespace internal_pack_NE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_NE(double *RESTRICT _data_buffer, double *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_NE
+
+namespace internal_unpack_SW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_SW
+
+namespace internal_unpack_BW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_BW
+
+namespace internal_unpack_W {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_W(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_W
+
+namespace internal_unpack_TW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_TW
+
+namespace internal_unpack_NW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_NW(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_NW
+
+namespace internal_unpack_BS {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BS(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_BS
+
+namespace internal_unpack_S {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_S(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_S
+
+namespace internal_unpack_TS {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TS(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_TS
+
+namespace internal_unpack_B {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_B(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_B
+
+namespace internal_unpack_T {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_T(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_T
+
+namespace internal_unpack_BN {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BN(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_BN
+
+namespace internal_unpack_N {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_N(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_N
+
+namespace internal_unpack_TN {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TN(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_TN
+
+namespace internal_unpack_SE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_SE
+
+namespace internal_unpack_BE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_BE
+
+namespace internal_unpack_E {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_E(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_E
+
+namespace internal_unpack_TE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_TE
+
+namespace internal_unpack_NE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_NE(double *RESTRICT const _data_buffer, double *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_NE
+
+void PackInfoPdfDoublePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+  double *buffer = reinterpret_cast<double *>(byte_buffer);
+
+  auto pdfs = block->getData<gpu::GPUField<double>>(pdfsID);
+
+  CellInterval ci;
+  pdfs->getSliceBeforeGhostLayer(dir, ci, 1, false);
+
+  switch (dir) {
+  case stencil::SW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_SW::pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_BW::pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::W: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_W::pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_TW::pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::NW: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_NW::pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BS: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_BS::pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::S: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_S::pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TS: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_TS::pack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::B: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_B::pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::T: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_T::pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BN: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_BN::pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::N: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_N::pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TN: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_TN::pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::SE: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_SE::pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BE: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_BE::pack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::E: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_E::pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TE: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_TE::pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::NE: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_NE::pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  default:
+    return;
+  }
+}
+
+void PackInfoPdfDoublePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+  double *buffer = reinterpret_cast<double *>(byte_buffer);
+
+  auto pdfs = block->getData<gpu::GPUField<double>>(pdfsID);
+
+  CellInterval ci;
+  pdfs->getGhostRegion(dir, ci, 1, false);
+  auto communciationDirection = stencil::inverseDir[dir];
+
+  switch (communciationDirection) {
+  case stencil::SW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_SW::unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_BW::unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::W: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_W::unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_TW::unpack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::NW: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_NW::unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BS: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_BS::unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::S: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_S::unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TS: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_TS::unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::B: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_B::unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::T: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_T::unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BN: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_BN::unpack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::N: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_N::unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TN: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_TN::unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::SE: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_SE::unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BE: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_BE::unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::E: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_E::unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TE: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_TE::unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::NE: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    double *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_NE::unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  default:
+    return;
+  }
+}
+
+uint_t PackInfoPdfDoublePrecisionCUDA::size(stencil::Direction dir, IBlock *block) {
+  auto pdfs = block->getData<gpu::GPUField<double>>(pdfsID);
+
+  CellInterval ci;
+  pdfs->getGhostRegion(dir, ci, 1, false);
+
+  uint_t elementsPerCell = 0;
+
+  switch (dir) {
+  case stencil::SW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::BW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::W:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::TW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::NW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::BS:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::S:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::TS:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::B:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::T:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::BN:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::N:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::TN:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::SE:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::BE:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::E:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::TE:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::NE:
+    elementsPerCell = 1;
+    break;
+
+  default:
+    elementsPerCell = 0;
+  }
+  return ci.numCells() * elementsPerCell * sizeof(double);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.h
new file mode 100644
index 00000000000..256f03be494
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfDoublePrecisionCUDA.h
@@ -0,0 +1,64 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file PackInfoPdfDoublePrecisionCUDA.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "stencil/Directions.h"
+
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/GeneratedGPUPackInfo.h"
+
+namespace walberla {
+namespace pystencils {
+
+class PackInfoPdfDoublePrecisionCUDA
+    : public ::walberla::gpu::GeneratedGPUPackInfo {
+public:
+  PackInfoPdfDoublePrecisionCUDA(BlockDataID pdfsID_) : pdfsID(pdfsID_){};
+  virtual ~PackInfoPdfDoublePrecisionCUDA() {}
+
+  void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+            gpuStream_t stream) override;
+  void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */,
+                        IBlock * /* receiver */,
+                        gpuStream_t /* stream */) override {
+    WALBERLA_ABORT("Local Communication not implemented yet for standard "
+                   "PackInfos. To run your application turn of local "
+                   "communication in the Communication class")
+  }
+  void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+              gpuStream_t stream) override;
+  uint_t size(stencil::Direction dir, IBlock *block) override;
+
+private:
+  BlockDataID pdfsID;
+};
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp
index e55017ab211..b1ac86db8e2 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecision.cpp
@@ -24,8 +24,6 @@
 #include "core/cell/CellInterval.h"
 #include "stencil/Directions.h"
 
-#include <cstddef>
-
 #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wfloat-equal"
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.cu
new file mode 100644
index 00000000000..51b2b40cd92
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.cu
@@ -0,0 +1,1423 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file PackInfoPdfSinglePrecisionCUDA.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "domain_decomposition/IBlock.h"
+#include "stencil/Directions.h"
+
+#include "PackInfoPdfSinglePrecisionCUDA.h"
+
+#define FUNC_PREFIX __global__
+
+#if defined(__NVCC__)
+#define RESTRICT __restrict__
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 177 // unused variable
+#else
+#pragma push
+#pragma diag_suppress 177 // unused variable
+#endif                    // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#else
+// clang compiling CUDA code in host mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+#define RESTRICT __restrict__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#elif defined(_MSC_VER)
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+using walberla::cell::CellInterval;
+using walberla::stencil::Direction;
+
+namespace internal_pack_SW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_SW
+
+namespace internal_pack_BW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_BW
+
+namespace internal_pack_W {
+static FUNC_PREFIX __launch_bounds__(256) void pack_W(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_W
+
+namespace internal_pack_TW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_TW
+
+namespace internal_pack_NW {
+static FUNC_PREFIX __launch_bounds__(256) void pack_NW(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_NW
+
+namespace internal_pack_BS {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BS(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_BS
+
+namespace internal_pack_S {
+static FUNC_PREFIX __launch_bounds__(256) void pack_S(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_S
+
+namespace internal_pack_TS {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TS(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_TS
+
+namespace internal_pack_B {
+static FUNC_PREFIX __launch_bounds__(256) void pack_B(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_B
+
+namespace internal_pack_T {
+static FUNC_PREFIX __launch_bounds__(256) void pack_T(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_T
+
+namespace internal_pack_BN {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BN(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_BN
+
+namespace internal_pack_N {
+static FUNC_PREFIX __launch_bounds__(256) void pack_N(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_N
+
+namespace internal_pack_TN {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TN(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_TN
+
+namespace internal_pack_SE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_SE
+
+namespace internal_pack_BE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_BE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_BE
+
+namespace internal_pack_E {
+static FUNC_PREFIX __launch_bounds__(256) void pack_E(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3];
+    _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_E
+
+namespace internal_pack_TE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_TE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_TE
+
+namespace internal_pack_NE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_NE(float *RESTRICT _data_buffer, float *RESTRICT const _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0] = _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3];
+  }
+}
+} // namespace internal_pack_NE
+
+namespace internal_unpack_SW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_SW
+
+namespace internal_unpack_BW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_BW
+
+namespace internal_unpack_W {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_W(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 3 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_W
+
+namespace internal_unpack_TW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_TW
+
+namespace internal_unpack_NW {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_NW(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_NW
+
+namespace internal_unpack_BS {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BS(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_BS
+
+namespace internal_unpack_S {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_S(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 2 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 9 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_S
+
+namespace internal_unpack_TS {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TS(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_TS
+
+namespace internal_unpack_B {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_B(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 16 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 17 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 6 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_B
+
+namespace internal_unpack_T {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_T(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 12 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 13 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 5 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_T
+
+namespace internal_unpack_BN {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BN(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_BN
+
+namespace internal_unpack_N {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_N(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 15 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 7 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_N
+
+namespace internal_unpack_TN {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TN(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 11 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_TN
+
+namespace internal_unpack_SE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_SE
+
+namespace internal_unpack_BE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_BE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_BE
+
+namespace internal_unpack_E {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_E(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 10 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 1];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 18 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 2];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 4 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 3];
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[5 * _size_pdfs_0 * _size_pdfs_1 * ctr_2 + 5 * _size_pdfs_0 * ctr_1 + 5 * ctr_0 + 4];
+  }
+}
+} // namespace internal_unpack_E
+
+namespace internal_unpack_TE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_TE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 14 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_TE
+
+namespace internal_unpack_NE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_NE(float *RESTRICT const _data_buffer, float *RESTRICT _data_pdfs, int64_t const _size_pdfs_0, int64_t const _size_pdfs_1, int64_t const _size_pdfs_2, int64_t const _stride_pdfs_0, int64_t const _stride_pdfs_1, int64_t const _stride_pdfs_2, int64_t const _stride_pdfs_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_pdfs_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_pdfs_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_pdfs_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_pdfs[_stride_pdfs_0 * ctr_0 + _stride_pdfs_1 * ctr_1 + _stride_pdfs_2 * ctr_2 + 8 * _stride_pdfs_3] = _data_buffer[_size_pdfs_0 * _size_pdfs_1 * ctr_2 + _size_pdfs_0 * ctr_1 + ctr_0];
+  }
+}
+} // namespace internal_unpack_NE
+
+void PackInfoPdfSinglePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+  float *buffer = reinterpret_cast<float *>(byte_buffer);
+
+  auto pdfs = block->getData<gpu::GPUField<float>>(pdfsID);
+
+  CellInterval ci;
+  pdfs->getSliceBeforeGhostLayer(dir, ci, 1, false);
+
+  switch (dir) {
+  case stencil::SW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_SW::pack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_BW::pack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::W: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_W::pack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_TW::pack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::NW: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_NW::pack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BS: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_BS::pack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::S: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_S::pack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TS: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_TS::pack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::B: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_B::pack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::T: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_T::pack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BN: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_BN::pack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::N: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_N::pack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TN: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_TN::pack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::SE: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_SE::pack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BE: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_BE::pack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::E: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_E::pack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TE: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_TE::pack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::NE: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT const _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_pack_NE::pack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  default:
+    return;
+  }
+}
+
+void PackInfoPdfSinglePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+  float *buffer = reinterpret_cast<float *>(byte_buffer);
+
+  auto pdfs = block->getData<gpu::GPUField<float>>(pdfsID);
+
+  CellInterval ci;
+  pdfs->getGhostRegion(dir, ci, 1, false);
+  auto communciationDirection = stencil::inverseDir[dir];
+
+  switch (communciationDirection) {
+  case stencil::SW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_SW::unpack_SW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_BW::unpack_BW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::W: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_W::unpack_W<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_TW::unpack_TW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::NW: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_NW::unpack_NW<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BS: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_BS::unpack_BS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::S: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_S::unpack_S<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TS: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_TS::unpack_TS<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::B: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_B::unpack_B<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::T: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_T::unpack_T<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BN: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_BN::unpack_BN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::N: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_N::unpack_N<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TN: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_TN::unpack_TN<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::SE: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_SE::unpack_SE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::BE: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_BE::unpack_BE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::E: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_E::unpack_E<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::TE: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_TE::unpack_TE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  case stencil::NE: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(pdfs->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(pdfs->nrOfGhostLayers()))
+    float *RESTRICT _data_pdfs = pdfs->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_pdfs_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_pdfs_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(pdfs->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_pdfs_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_pdfs_0 = int64_t(pdfs->xStride());
+    const int64_t _stride_pdfs_1 = int64_t(pdfs->yStride());
+    const int64_t _stride_pdfs_2 = int64_t(pdfs->zStride());
+    const int64_t _stride_pdfs_3 = int64_t(1 * int64_t(pdfs->fStride()));
+    dim3 _block(uint32_c(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)), uint32_c(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))), uint32_c(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))));
+    dim3 _grid(uint32_c(((_size_pdfs_0) % (((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) == 0 ? (int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)) : ((int64_t)(_size_pdfs_0) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))) + 1)), uint32_c(((_size_pdfs_1) % (((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) == 0 ? (int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))) : ((int64_t)(_size_pdfs_1) / (int64_t)(((1024 < ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))) ? 1024 : ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) + 1)), uint32_c(((_size_pdfs_2) % (((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) == 0 ? (int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))))) : ((int64_t)(_size_pdfs_2) / (int64_t)(((64 < ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))) ? 64 : ((_size_pdfs_2 < ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0))))))) ? _size_pdfs_2 : ((int64_t)(256) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0) * ((_size_pdfs_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))) ? _size_pdfs_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_pdfs_0) ? 128 : _size_pdfs_0)))))))))) + 1)));
+    internal_unpack_NE::unpack_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_pdfs, _size_pdfs_0, _size_pdfs_1, _size_pdfs_2, _stride_pdfs_0, _stride_pdfs_1, _stride_pdfs_2, _stride_pdfs_3);
+    break;
+  }
+
+  default:
+    return;
+  }
+}
+
+uint_t PackInfoPdfSinglePrecisionCUDA::size(stencil::Direction dir, IBlock *block) {
+  auto pdfs = block->getData<gpu::GPUField<float>>(pdfsID);
+
+  CellInterval ci;
+  pdfs->getGhostRegion(dir, ci, 1, false);
+
+  uint_t elementsPerCell = 0;
+
+  switch (dir) {
+  case stencil::SW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::BW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::W:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::TW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::NW:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::BS:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::S:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::TS:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::B:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::T:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::BN:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::N:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::TN:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::SE:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::BE:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::E:
+    elementsPerCell = 5;
+    break;
+
+  case stencil::TE:
+    elementsPerCell = 1;
+    break;
+
+  case stencil::NE:
+    elementsPerCell = 1;
+    break;
+
+  default:
+    elementsPerCell = 0;
+  }
+  return ci.numCells() * elementsPerCell * sizeof(float);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.h
new file mode 100644
index 00000000000..c6ee2782b9e
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoPdfSinglePrecisionCUDA.h
@@ -0,0 +1,64 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file PackInfoPdfSinglePrecisionCUDA.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "stencil/Directions.h"
+
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/GeneratedGPUPackInfo.h"
+
+namespace walberla {
+namespace pystencils {
+
+class PackInfoPdfSinglePrecisionCUDA
+    : public ::walberla::gpu::GeneratedGPUPackInfo {
+public:
+  PackInfoPdfSinglePrecisionCUDA(BlockDataID pdfsID_) : pdfsID(pdfsID_){};
+  virtual ~PackInfoPdfSinglePrecisionCUDA() {}
+
+  void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+            gpuStream_t stream) override;
+  void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */,
+                        IBlock * /* receiver */,
+                        gpuStream_t /* stream */) override {
+    WALBERLA_ABORT("Local Communication not implemented yet for standard "
+                   "PackInfos. To run your application turn of local "
+                   "communication in the Communication class")
+  }
+  void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+              gpuStream_t stream) override;
+  uint_t size(stencil::Direction dir, IBlock *block) override;
+
+private:
+  BlockDataID pdfsID;
+};
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp
index 6cbf3cb98dc..da91325e5e5 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecision.cpp
@@ -24,8 +24,6 @@
 #include "core/cell/CellInterval.h"
 #include "stencil/Directions.h"
 
-#include <cstddef>
-
 #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wfloat-equal"
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.cu
new file mode 100644
index 00000000000..e9bae41971b
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.cu
@@ -0,0 +1,243 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file PackInfoVecDoublePrecisionCUDA.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "domain_decomposition/IBlock.h"
+#include "stencil/Directions.h"
+
+#include "PackInfoVecDoublePrecisionCUDA.h"
+
+#define FUNC_PREFIX __global__
+
+#if defined(__NVCC__)
+#define RESTRICT __restrict__
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 177 // unused variable
+#else
+#pragma push
+#pragma diag_suppress 177 // unused variable
+#endif                    // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#else
+// clang compiling CUDA code in host mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+#define RESTRICT __restrict__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#elif defined(_MSC_VER)
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+using walberla::cell::CellInterval;
+using walberla::stencil::Direction;
+
+namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT _data_buffer, double *RESTRICT const _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2];
+    _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3];
+    _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3];
+  }
+}
+} // namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE
+
+namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(double *RESTRICT const _data_buffer, double *RESTRICT _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0];
+    _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1];
+    _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2];
+  }
+}
+} // namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE
+
+void PackInfoVecDoublePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+  double *buffer = reinterpret_cast<double *>(byte_buffer);
+
+  auto field = block->getData<gpu::GPUField<double>>(fieldID);
+
+  CellInterval ci;
+  field->getSliceBeforeGhostLayer(dir, ci, 1, false);
+
+  switch (dir) {
+  case stencil::SW:
+  case stencil::BW:
+  case stencil::W:
+  case stencil::TW:
+  case stencil::NW:
+  case stencil::BS:
+  case stencil::S:
+  case stencil::TS:
+  case stencil::B:
+  case stencil::C:
+  case stencil::T:
+  case stencil::BN:
+  case stencil::N:
+  case stencil::TN:
+  case stencil::SE:
+  case stencil::BE:
+  case stencil::E:
+  case stencil::TE:
+  case stencil::NE: {
+    double *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers()))
+    double *RESTRICT const _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_field_0 = int64_t(field->xStride());
+    const int64_t _stride_field_1 = int64_t(field->yStride());
+    const int64_t _stride_field_2 = int64_t(field->zStride());
+    const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride()));
+    dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))));
+    dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1)));
+    internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3);
+    break;
+  }
+
+  default:
+    return;
+  }
+}
+
+void PackInfoVecDoublePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+  double *buffer = reinterpret_cast<double *>(byte_buffer);
+
+  auto field = block->getData<gpu::GPUField<double>>(fieldID);
+
+  CellInterval ci;
+  field->getGhostRegion(dir, ci, 1, false);
+  auto communciationDirection = stencil::inverseDir[dir];
+
+  switch (communciationDirection) {
+  case stencil::SW:
+  case stencil::BW:
+  case stencil::W:
+  case stencil::TW:
+  case stencil::NW:
+  case stencil::BS:
+  case stencil::S:
+  case stencil::TS:
+  case stencil::B:
+  case stencil::C:
+  case stencil::T:
+  case stencil::BN:
+  case stencil::N:
+  case stencil::TN:
+  case stencil::SE:
+  case stencil::BE:
+  case stencil::E:
+  case stencil::TE:
+  case stencil::NE: {
+    double *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers()))
+    double *RESTRICT _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_field_0 = int64_t(field->xStride());
+    const int64_t _stride_field_1 = int64_t(field->yStride());
+    const int64_t _stride_field_2 = int64_t(field->zStride());
+    const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride()));
+    dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))));
+    dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1)));
+    internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3);
+    break;
+  }
+
+  default:
+    return;
+  }
+}
+
+uint_t PackInfoVecDoublePrecisionCUDA::size(stencil::Direction dir, IBlock *block) {
+  auto field = block->getData<gpu::GPUField<double>>(fieldID);
+
+  CellInterval ci;
+  field->getGhostRegion(dir, ci, 1, false);
+
+  uint_t elementsPerCell = 0;
+
+  switch (dir) {
+  case stencil::SW:
+  case stencil::BW:
+  case stencil::W:
+  case stencil::TW:
+  case stencil::NW:
+  case stencil::BS:
+  case stencil::S:
+  case stencil::TS:
+  case stencil::B:
+  case stencil::C:
+  case stencil::T:
+  case stencil::BN:
+  case stencil::N:
+  case stencil::TN:
+  case stencil::SE:
+  case stencil::BE:
+  case stencil::E:
+  case stencil::TE:
+  case stencil::NE:
+    elementsPerCell = 3;
+    break;
+
+  default:
+    elementsPerCell = 0;
+  }
+  return ci.numCells() * elementsPerCell * sizeof(double);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.h
new file mode 100644
index 00000000000..18884f6c9d4
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecDoublePrecisionCUDA.h
@@ -0,0 +1,64 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file PackInfoVecDoublePrecisionCUDA.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "stencil/Directions.h"
+
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/GeneratedGPUPackInfo.h"
+
+namespace walberla {
+namespace pystencils {
+
+class PackInfoVecDoublePrecisionCUDA
+    : public ::walberla::gpu::GeneratedGPUPackInfo {
+public:
+  PackInfoVecDoublePrecisionCUDA(BlockDataID fieldID_) : fieldID(fieldID_){};
+  virtual ~PackInfoVecDoublePrecisionCUDA() {}
+
+  void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+            gpuStream_t stream) override;
+  void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */,
+                        IBlock * /* receiver */,
+                        gpuStream_t /* stream */) override {
+    WALBERLA_ABORT("Local Communication not implemented yet for standard "
+                   "PackInfos. To run your application turn of local "
+                   "communication in the Communication class")
+  }
+  void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+              gpuStream_t stream) override;
+  uint_t size(stencil::Direction dir, IBlock *block) override;
+
+private:
+  BlockDataID fieldID;
+};
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp
index 3ddeee01b69..c3b718b2d42 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecision.cpp
@@ -24,8 +24,6 @@
 #include "core/cell/CellInterval.h"
 #include "stencil/Directions.h"
 
-#include <cstddef>
-
 #if (defined WALBERLA_CXX_COMPILER_IS_GNU) || (defined WALBERLA_CXX_COMPILER_IS_CLANG)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wfloat-equal"
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.cu b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.cu
new file mode 100644
index 00000000000..c38b9e669bb
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.cu
@@ -0,0 +1,243 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file PackInfoVecSinglePrecisionCUDA.cpp
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3, lbmpy_walberla/pystencils_walberla from waLBerla commit b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+#include "domain_decomposition/IBlock.h"
+#include "stencil/Directions.h"
+
+#include "PackInfoVecSinglePrecisionCUDA.h"
+
+#define FUNC_PREFIX __global__
+
+#if defined(__NVCC__)
+#define RESTRICT __restrict__
+#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#pragma nv_diagnostic push
+#pragma nv_diag_suppress 177 // unused variable
+#else
+#pragma push
+#pragma diag_suppress 177 // unused variable
+#endif                    // defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
+#elif defined(__clang__)
+#if defined(__CUDA__)
+#if defined(__CUDA_ARCH__)
+// clang compiling CUDA code in device mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#else
+// clang compiling CUDA code in host mode
+#define RESTRICT __restrict__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif // defined(__CUDA_ARCH__)
+#endif // defined(__CUDA__)
+#elif defined(__GNUC__) or defined(__GNUG__)
+#define RESTRICT __restrict__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#elif defined(_MSC_VER)
+#define RESTRICT __restrict
+#else
+#define RESTRICT
+#endif
+
+namespace walberla {
+namespace pystencils {
+
+using walberla::cell::CellInterval;
+using walberla::stencil::Direction;
+
+namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE {
+static FUNC_PREFIX __launch_bounds__(256) void pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(float *RESTRICT _data_buffer, float *RESTRICT const _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2];
+    _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3];
+    _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2] = _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3];
+  }
+}
+} // namespace internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE
+
+namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE {
+static FUNC_PREFIX __launch_bounds__(256) void unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE(float *RESTRICT const _data_buffer, float *RESTRICT _data_field, int64_t const _size_field_0, int64_t const _size_field_1, int64_t const _size_field_2, int64_t const _stride_field_0, int64_t const _stride_field_1, int64_t const _stride_field_2, int64_t const _stride_field_3) {
+  if (blockDim.x * blockIdx.x + threadIdx.x < _size_field_0 && blockDim.y * blockIdx.y + threadIdx.y < _size_field_1 && blockDim.z * blockIdx.z + threadIdx.z < _size_field_2) {
+    const int64_t ctr_0 = blockDim.x * blockIdx.x + threadIdx.x;
+    const int64_t ctr_1 = blockDim.y * blockIdx.y + threadIdx.y;
+    const int64_t ctr_2 = blockDim.z * blockIdx.z + threadIdx.z;
+    _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0];
+    _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 1];
+    _data_field[_stride_field_0 * ctr_0 + _stride_field_1 * ctr_1 + _stride_field_2 * ctr_2 + 2 * _stride_field_3] = _data_buffer[3 * _size_field_0 * _size_field_1 * ctr_2 + 3 * _size_field_0 * ctr_1 + 3 * ctr_0 + 2];
+  }
+}
+} // namespace internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE
+
+void PackInfoVecSinglePrecisionCUDA::pack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+  float *buffer = reinterpret_cast<float *>(byte_buffer);
+
+  auto field = block->getData<gpu::GPUField<float>>(fieldID);
+
+  CellInterval ci;
+  field->getSliceBeforeGhostLayer(dir, ci, 1, false);
+
+  switch (dir) {
+  case stencil::SW:
+  case stencil::BW:
+  case stencil::W:
+  case stencil::TW:
+  case stencil::NW:
+  case stencil::BS:
+  case stencil::S:
+  case stencil::TS:
+  case stencil::B:
+  case stencil::C:
+  case stencil::T:
+  case stencil::BN:
+  case stencil::N:
+  case stencil::TN:
+  case stencil::SE:
+  case stencil::BE:
+  case stencil::E:
+  case stencil::TE:
+  case stencil::NE: {
+    float *RESTRICT _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers()))
+    float *RESTRICT const _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_field_0 = int64_t(field->xStride());
+    const int64_t _stride_field_1 = int64_t(field->yStride());
+    const int64_t _stride_field_2 = int64_t(field->zStride());
+    const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride()));
+    dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))));
+    dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1)));
+    internal_pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::pack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3);
+    break;
+  }
+
+  default:
+    return;
+  }
+}
+
+void PackInfoVecSinglePrecisionCUDA::unpack(Direction dir, unsigned char *byte_buffer, IBlock *block, gpuStream_t stream) {
+  float *buffer = reinterpret_cast<float *>(byte_buffer);
+
+  auto field = block->getData<gpu::GPUField<float>>(fieldID);
+
+  CellInterval ci;
+  field->getGhostRegion(dir, ci, 1, false);
+  auto communciationDirection = stencil::inverseDir[dir];
+
+  switch (communciationDirection) {
+  case stencil::SW:
+  case stencil::BW:
+  case stencil::W:
+  case stencil::TW:
+  case stencil::NW:
+  case stencil::BS:
+  case stencil::S:
+  case stencil::TS:
+  case stencil::B:
+  case stencil::C:
+  case stencil::T:
+  case stencil::BN:
+  case stencil::N:
+  case stencil::TN:
+  case stencil::SE:
+  case stencil::BE:
+  case stencil::E:
+  case stencil::TE:
+  case stencil::NE: {
+    float *RESTRICT const _data_buffer = buffer;
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.xMin(), -int_c(field->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.yMin(), -int_c(field->nrOfGhostLayers()))
+    WALBERLA_ASSERT_GREATER_EQUAL(ci.zMin(), -int_c(field->nrOfGhostLayers()))
+    float *RESTRICT _data_field = field->dataAt(ci.xMin(), ci.yMin(), ci.zMin(), 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->xSizeWithGhostLayer(), int64_t(int64_c(ci.xSize()) + 0))
+    const int64_t _size_field_0 = int64_t(int64_c(ci.xSize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->ySizeWithGhostLayer(), int64_t(int64_c(ci.ySize()) + 0))
+    const int64_t _size_field_1 = int64_t(int64_c(ci.ySize()) + 0);
+    WALBERLA_ASSERT_GREATER_EQUAL(field->zSizeWithGhostLayer(), int64_t(int64_c(ci.zSize()) + 0))
+    const int64_t _size_field_2 = int64_t(int64_c(ci.zSize()) + 0);
+    const int64_t _stride_field_0 = int64_t(field->xStride());
+    const int64_t _stride_field_1 = int64_t(field->yStride());
+    const int64_t _stride_field_2 = int64_t(field->zStride());
+    const int64_t _stride_field_3 = int64_t(1 * int64_t(field->fStride()));
+    dim3 _block(uint32_c(((128 < _size_field_0) ? 128 : _size_field_0)), uint32_c(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))), uint32_c(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))));
+    dim3 _grid(uint32_c(((_size_field_0) % (((128 < _size_field_0) ? 128 : _size_field_0)) == 0 ? (int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)) : ((int64_t)(_size_field_0) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))) + 1)), uint32_c(((_size_field_1) % (((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) == 0 ? (int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))) : ((int64_t)(_size_field_1) / (int64_t)(((1024 < ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))) ? 1024 : ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) + 1)), uint32_c(((_size_field_2) % (((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) == 0 ? (int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))))) : ((int64_t)(_size_field_2) / (int64_t)(((64 < ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))) ? 64 : ((_size_field_2 < ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0))))))) ? _size_field_2 : ((int64_t)(256) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0) * ((_size_field_1 < 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))) ? _size_field_1 : 2 * ((int64_t)(128) / (int64_t)(((128 < _size_field_0) ? 128 : _size_field_0)))))))))) + 1)));
+    internal_unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE::unpack_SW_BW_W_TW_NW_BS_S_TS_B_C_T_BN_N_TN_SE_BE_E_TE_NE<<<_grid, _block, 0, stream>>>(_data_buffer, _data_field, _size_field_0, _size_field_1, _size_field_2, _stride_field_0, _stride_field_1, _stride_field_2, _stride_field_3);
+    break;
+  }
+
+  default:
+    return;
+  }
+}
+
+uint_t PackInfoVecSinglePrecisionCUDA::size(stencil::Direction dir, IBlock *block) {
+  auto field = block->getData<gpu::GPUField<float>>(fieldID);
+
+  CellInterval ci;
+  field->getGhostRegion(dir, ci, 1, false);
+
+  uint_t elementsPerCell = 0;
+
+  switch (dir) {
+  case stencil::SW:
+  case stencil::BW:
+  case stencil::W:
+  case stencil::TW:
+  case stencil::NW:
+  case stencil::BS:
+  case stencil::S:
+  case stencil::TS:
+  case stencil::B:
+  case stencil::C:
+  case stencil::T:
+  case stencil::BN:
+  case stencil::N:
+  case stencil::TN:
+  case stencil::SE:
+  case stencil::BE:
+  case stencil::E:
+  case stencil::TE:
+  case stencil::NE:
+    elementsPerCell = 3;
+    break;
+
+  default:
+    elementsPerCell = 0;
+  }
+  return ci.numCells() * elementsPerCell * sizeof(float);
+}
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.h b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.h
new file mode 100644
index 00000000000..c1eb6d2be71
--- /dev/null
+++ b/src/walberla_bridge/src/lattice_boltzmann/generated_kernels/PackInfoVecSinglePrecisionCUDA.h
@@ -0,0 +1,64 @@
+//======================================================================================================================
+//
+//  This file is part of waLBerla. waLBerla is free software: you can
+//  redistribute it and/or modify it under the terms of the GNU General Public
+//  License as published by the Free Software Foundation, either version 3 of
+//  the License, or (at your option) any later version.
+//
+//  waLBerla is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+//  for more details.
+//
+//  You should have received a copy of the GNU General Public License along
+//  with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>.
+//
+//! \\file PackInfoVecSinglePrecisionCUDA.h
+//! \\author pystencils
+//======================================================================================================================
+
+// kernel generated with pystencils v1.3.3, lbmpy v1.3.3,
+// lbmpy_walberla/pystencils_walberla from waLBerla commit
+// b0842e1a493ce19ef1bbb8d2cf382fc343970a7f
+
+#pragma once
+
+#include "core/DataTypes.h"
+#include "core/cell/CellInterval.h"
+
+#include "domain_decomposition/IBlock.h"
+
+#include "stencil/Directions.h"
+
+#include "gpu/GPUField.h"
+#include "gpu/GPUWrapper.h"
+#include "gpu/communication/GeneratedGPUPackInfo.h"
+
+namespace walberla {
+namespace pystencils {
+
+class PackInfoVecSinglePrecisionCUDA
+    : public ::walberla::gpu::GeneratedGPUPackInfo {
+public:
+  PackInfoVecSinglePrecisionCUDA(BlockDataID fieldID_) : fieldID(fieldID_){};
+  virtual ~PackInfoVecSinglePrecisionCUDA() {}
+
+  void pack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+            gpuStream_t stream) override;
+  void communicateLocal(stencil::Direction /*dir*/, const IBlock * /* sender */,
+                        IBlock * /* receiver */,
+                        gpuStream_t /* stream */) override {
+    WALBERLA_ABORT("Local Communication not implemented yet for standard "
+                   "PackInfos. To run your application turn of local "
+                   "communication in the Communication class")
+  }
+  void unpack(stencil::Direction dir, unsigned char *buffer, IBlock *block,
+              gpuStream_t stream) override;
+  uint_t size(stencil::Direction dir, IBlock *block) override;
+
+private:
+  BlockDataID fieldID;
+};
+
+} // namespace pystencils
+} // namespace walberla
diff --git a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh
index f9dc9ae83d2..a71202df382 100644
--- a/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh
+++ b/src/walberla_bridge/src/lattice_boltzmann/lb_kernels.cuh
@@ -29,6 +29,10 @@
 #include "generated_kernels/FieldAccessorsSinglePrecisionCUDA.cuh"
 #include "generated_kernels/InitialPDFsSetterDoublePrecisionCUDA.h"
 #include "generated_kernels/InitialPDFsSetterSinglePrecisionCUDA.h"
+#include "generated_kernels/PackInfoPdfDoublePrecisionCUDA.h"
+#include "generated_kernels/PackInfoPdfSinglePrecisionCUDA.h"
+#include "generated_kernels/PackInfoVecDoublePrecisionCUDA.h"
+#include "generated_kernels/PackInfoVecSinglePrecisionCUDA.h"
 #include "generated_kernels/StreamSweepDoublePrecisionCUDA.h"
 #include "generated_kernels/StreamSweepSinglePrecisionCUDA.h"
 
@@ -49,6 +53,8 @@ template <> struct KernelTrait<double, Arch::GPU> {
       pystencils::CollideSweepDoublePrecisionLeesEdwardsCUDA;
   using StreamSweep = pystencils::StreamSweepDoublePrecisionCUDA;
   using InitialPDFsSetter = pystencils::InitialPDFsSetterDoublePrecisionCUDA;
+  using PackInfoPdf = pystencils::PackInfoPdfDoublePrecisionCUDA;
+  using PackInfoVec = pystencils::PackInfoVecDoublePrecisionCUDA;
 };
 
 template <> struct KernelTrait<float, Arch::GPU> {
@@ -58,6 +64,8 @@ template <> struct KernelTrait<float, Arch::GPU> {
       pystencils::CollideSweepSinglePrecisionLeesEdwardsCUDA;
   using StreamSweep = pystencils::StreamSweepSinglePrecisionCUDA;
   using InitialPDFsSetter = pystencils::InitialPDFsSetterSinglePrecisionCUDA;
+  using PackInfoPdf = pystencils::PackInfoPdfSinglePrecisionCUDA;
+  using PackInfoVec = pystencils::PackInfoVecSinglePrecisionCUDA;
 };
 
 template <> struct BoundaryHandlingTrait<double, Arch::GPU> {
diff --git a/src/walberla_bridge/tests/CMakeLists.txt b/src/walberla_bridge/tests/CMakeLists.txt
index 83a7d9d2ee4..06342ac225a 100644
--- a/src/walberla_bridge/tests/CMakeLists.txt
+++ b/src/walberla_bridge/tests/CMakeLists.txt
@@ -24,9 +24,11 @@ function(ESPRESSO_ADD_TEST)
   espresso_unit_test(
     SRC ${TEST_SRC} NAME ${TEST_NAME} NUM_PROC ${TEST_NUM_PROC} DEPENDS
     ${TEST_DEPENDS} espresso::walberla espresso::utils)
+  if(WALBERLA_BUILD_WITH_CUDA)
+    target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla_cuda)
+  endif()
   if(${TEST_SRC} MATCHES ".*\.cu$")
-    target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cuda_flags
-                                               espresso::walberla_cuda)
+    target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cuda_flags)
   else()
     target_link_libraries(${TEST_NAME} PRIVATE espresso::walberla::cpp_flags)
   endif()
diff --git a/testsuite/scripts/benchmarks/CMakeLists.txt b/testsuite/scripts/benchmarks/CMakeLists.txt
index 47583fdb579..76ecaa46127 100644
--- a/testsuite/scripts/benchmarks/CMakeLists.txt
+++ b/testsuite/scripts/benchmarks/CMakeLists.txt
@@ -43,7 +43,7 @@ add_custom_target(
 
 benchmark_test(FILE test_lj.py)
 benchmark_test(FILE test_lb.py SUFFIX cpu)
-# benchmark_test(FILE test_lb.py SUFFIX gpu LABELS "gpu") # TODO WALBERLA
+benchmark_test(FILE test_lb.py SUFFIX gpu LABELS "gpu")
 benchmark_test(FILE test_p3m.py SUFFIX cpu)
 benchmark_test(FILE test_p3m.py SUFFIX gpu LABELS "gpu")
 benchmark_test(FILE test_ferrofluid.py)

From 7598a3bea51feb47f2d1a694e3b983f8d3f70f44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jean-No=C3=ABl=20Grad?= <jgrad@icp.uni-stuttgart.de>
Date: Mon, 4 Nov 2024 21:01:19 +0100
Subject: [PATCH 2/2] Document LB GPU patches and benchmarks

---
 doc/sphinx/lb.rst                             | 33 +++++++++++++++++++
 .../walberla_kernels/generate_lb_kernels.py   |  8 ++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/doc/sphinx/lb.rst b/doc/sphinx/lb.rst
index 9f1f0189699..a1d4e7388e7 100644
--- a/doc/sphinx/lb.rst
+++ b/doc/sphinx/lb.rst
@@ -397,6 +397,39 @@ of the first LB GPU instance::
 
     system.cuda_init_handle.call_method("set_device_id_per_rank")
 
+Due to padding, the memory footprint of the GPU fields is not a linear function
+of the grid size. Instead, it is a step function of the size along the x-direction
+of the rank-local LB domain.
+For illustration, a local LB domain with dimensions 64x256x256 will take as
+much VRAM as a domain with size 127x256x256 in single-precision mode.
+As a rule of thumb, the VRAM in GiB per rank-local LB domain will be:
+
+.. math::
+
+   \label{eq:lj}
+     f(n_x, n_y, n_z) =
+       \begin{cases}
+         \left\lceil n_x / 64 \right\rceil \cdot 64 \cdot n_y \cdot n_z \cdot 204 / 1024^3
+         & \text{(in single-precision)}\\
+         \left\lceil n_x / 32 \right\rceil \cdot 32 \cdot n_y \cdot n_z \cdot 410 / 1024^3
+         & \text{(in double-precision)}
+       \end{cases}
+
+with :math:`n_x`, :math:`n_y`, :math:`n_z` the LB domain size in agrid units, including the ghost layer.
+
+Regarding communication between GPUs, for optimal performance the MPI topology
+should divide the z-direction first, the y-direction second, and the x-direction
+last, i.e. ascending order of the prime factors. Please note the default MPI
+Cartesian grid in |es| is sorted in descending order of the prime factors,
+and leads to poor performance. For illustration, a Cartesian grid with
+shape ``[1, 1, 8]`` yields 94% weak scaling efficiency,
+shape ``[8, 1, 1]`` yields 90%,
+shape ``[1, 2, 4]`` yields 88%,
+shape ``[4, 2, 1]`` yields 86%,
+shape ``[2, 2, 2]`` yields 81%.
+This is assuming 1 GPU per CPU. Using more than 1 CPU per GPU or more
+than 1 GPU per CPU can degrade weak scaling efficiency further.
+
 .. _Electrohydrodynamics:
 
 Electrohydrodynamics
diff --git a/maintainer/walberla_kernels/generate_lb_kernels.py b/maintainer/walberla_kernels/generate_lb_kernels.py
index bef9badc541..f9e93f16a1e 100644
--- a/maintainer/walberla_kernels/generate_lb_kernels.py
+++ b/maintainer/walberla_kernels/generate_lb_kernels.py
@@ -221,6 +221,7 @@ def patch_file(class_name, extension, target_suffix, patch):
 
     def patch_packinfo_header(content, target_suffix):
         if target_suffix in ["", "AVX"]:
+            # fix MPI buffer memory alignment
             token = "\n       //TODO: optimize by generating kernel for this case\n"
             assert token in content
             content = content.replace(token, "\n")
@@ -232,6 +233,7 @@ def patch_packinfo_header(content, target_suffix):
             assert token in content
             content = content.replace(token, f"{token[:-1]} + sizeof({ft}))")
         elif target_suffix in ["CUDA"]:
+            # replace preprocessor macros and pragmas
             token = "#define FUNC_PREFIX __global__"
             assert token in content
             content = content.replace(token, "")
@@ -240,16 +242,18 @@ def patch_packinfo_header(content, target_suffix):
 
     def patch_packinfo_kernel(content, target_suffix):
         if target_suffix in ["", "AVX"]:
-            # fix MPI buffer
+            # fix MPI buffer memory alignment
             m = re.search("(float|double) *\* *buffer = reinterpret_cast<(?:float|double) *\*>\(byte_buffer\);\n", content)  # nopep8
             assert m is not None
             content = content.replace(m.group(0), f"byte_buffer += sizeof({m.group(1)}) - (reinterpret_cast<std::size_t>(byte_buffer) - (reinterpret_cast<std::size_t>(byte_buffer) / sizeof({m.group(1)})) * sizeof({m.group(1)}));\n  {m.group(0)}")  # nopep8
         if target_suffix in ["CUDA"]:
+            # replace preprocessor macros and pragmas
             token = "#define FUNC_PREFIX __global__"
             assert token in content
             push, _ = custom_additional_extensions.generate_device_preprocessor(
                 "packinfo", defines=("RESTRICT",))
             content = content.replace(token, f"{token}\n{push}")
+            # add missing includes
             token = '#include "PackInfo'
             assert token in content
             content = content.replace(token, f'#include "core/DataTypes.h"\n#include "core/cell/CellInterval.h"\n#include "domain_decomposition/IBlock.h"\n#include "stencil/Directions.h"\n\n{token}')  # nopep8
@@ -276,10 +280,12 @@ def patch_packinfo_kernel(content, target_suffix):
 
     # pylint: disable=unused-argument
     def patch_boundary_header(content, target_suffix):
+        # replace real_t by actual floating-point type
         return content.replace("real_t", config.data_type.default_factory().c_name)  # nopep8
 
     def patch_boundary_kernel(content, target_suffix):
         if target_suffix in ["CUDA"]:
+            # replace preprocessor macros and pragmas
             push, pop = custom_additional_extensions.generate_device_preprocessor(
                 "ubb_boundary", defines=("RESTRICT",))
             content = re.sub(r"#ifdef __GNUC__[\s\S]+?#endif(?=\n\n|\n//)", "", content)  # nopep8