diff --git a/.buildkite/testing.yml b/.buildkite/testing.yml
index 24f7c54..cea25e4 100644
--- a/.buildkite/testing.yml
+++ b/.buildkite/testing.yml
@@ -1,7 +1,7 @@
 steps:
   - group: ":julia: CUDA GPU"
     steps:
-      - label: ":julia: Julia {{matrix.julia}} + CUDA GPU"
+      - label: ":julia: Julia {{matrix.julia}} + CUDA GPU (Backend Group: {{matrix.group}})"
         plugins:
           - JuliaCI/julia#v1:
               version: "{{matrix.julia}}"
@@ -16,13 +16,16 @@ steps:
           queue: "juliagpu"
           cuda: "*"
         env:
-          BACKEND_GROUP: "CUDA"
+          BACKEND_GROUP: "{{matrix.group}}"
         if: build.message !~ /\[skip tests\]/ && build.message !~ /\[skip ci\]/
         timeout_in_minutes: 60
         matrix:
           setup:
             julia:
               - "1"
+            group:
+              - CUDA
+              - XLA
 
   - group: ":telescope: Downstream CUDA"
     steps:
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 21a8b87..8e0ae6b 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -21,7 +21,7 @@ concurrency:
 
 jobs:
   ci:
-    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ github.event_name }}
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.group }} - ${{ github.event_name }}
     if: ${{ !contains(github.event.head_commit.message, '[skip tests]') }}
     runs-on: ${{ matrix.os }}
     strategy:
@@ -33,6 +33,12 @@ jobs:
           - ubuntu-latest
           - macos-latest
           - windows-latest
+        group:
+          - CPU
+          - XLA
+        exclude:
+          - os: windows-latest
+            group: XLA
     steps:
       - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v2
@@ -50,6 +56,8 @@ jobs:
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
+        env:
+          GROUP: ${{ matrix.group }}
       - uses: julia-actions/julia-processcoverage@v1
         with:
           directories: src,ext
@@ -171,6 +179,3 @@ jobs:
       - name: Check if the PR does increase number of invalidations
         if: steps.invs_pr.outputs.total > steps.invs_default.outputs.total
         run: exit 1
-
-env:
-  BACKEND_GROUP: "CPU"
diff --git a/ext/MLDataDevicesMLUtilsExt.jl b/ext/MLDataDevicesMLUtilsExt.jl
index 693e661..e544bc0 100644
--- a/ext/MLDataDevicesMLUtilsExt.jl
+++ b/ext/MLDataDevicesMLUtilsExt.jl
@@ -1,10 +1,10 @@
 module MLDataDevicesMLUtilsExt
 
 using MLDataDevices: MLDataDevices, AbstractDevice, CPUDevice, CUDADevice, AMDGPUDevice,
-                     MetalDevice, oneAPIDevice, DeviceIterator
+                     MetalDevice, oneAPIDevice, XLADevice, DeviceIterator
 using MLUtils: MLUtils, DataLoader
 
-for dev in (CPUDevice, CUDADevice, AMDGPUDevice, MetalDevice, oneAPIDevice)
+for dev in (CPUDevice, CUDADevice, AMDGPUDevice, MetalDevice, oneAPIDevice, XLADevice)
     @eval function (D::$(dev))(dataloader::DataLoader)
         if dataloader.parallel
             if dataloader.buffer
diff --git a/ext/MLDataDevicesReactantExt.jl b/ext/MLDataDevicesReactantExt.jl
index 90e9f4e..3abc8fc 100644
--- a/ext/MLDataDevicesReactantExt.jl
+++ b/ext/MLDataDevicesReactantExt.jl
@@ -2,7 +2,7 @@ module MLDataDevicesReactantExt
 
 using Adapt: Adapt
 using MLDataDevices: MLDataDevices, Internal, XLADevice, CPUDevice
-using Reactant: Reactant, RArray, ConcreteRArray
+using Reactant: Reactant, RArray
 
 MLDataDevices.loaded(::Union{XLADevice, Type{<:XLADevice}}) = true
 MLDataDevices.functional(::Union{XLADevice, Type{<:XLADevice}}) = true
@@ -21,6 +21,6 @@ Internal.get_device_type(::RArray) = XLADevice
 Internal.unsafe_free_internal!(::Type{XLADevice}, x::AbstractArray) = nothing
 
 # Device Transfer
-Adapt.adapt_storage(::XLADevice, x::AbstractArray) = ConcreteRArray(x)
+Adapt.adapt_storage(::XLADevice, x::AbstractArray) = Reactant.to_rarray(x)
 
 end
diff --git a/src/internal.jl b/src/internal.jl
index 5c09c15..e13b716 100644
--- a/src/internal.jl
+++ b/src/internal.jl
@@ -35,13 +35,15 @@ for T in (CPUDevice, CUDADevice{Nothing}, AMDGPUDevice{Nothing},
     @eval get_device_id(::$(T)) = nothing
 end
 
-struct DeviceSelectionException <: Exception end
+struct DeviceSelectionException <: Exception
+    dev::String
+end
 
-function Base.showerror(io::IO, ::DeviceSelectionException)
-    return print(io, "DeviceSelectionException(No functional GPU device found!!)")
+function Base.showerror(io::IO, d::DeviceSelectionException)
+    return print(io, "DeviceSelectionException: No functional $(d.dev) device found!")
 end
 
-function get_gpu_device(; force_gpu_usage::Bool)
+function get_gpu_device(; force::Bool)
     backend = load_preference(MLDataDevices, "gpu_backend", nothing)
 
     # If backend set with preferences, use it
@@ -88,7 +90,7 @@ function get_gpu_device(; force_gpu_usage::Bool)
         end
     end
 
-    force_gpu_usage && throw(DeviceSelectionException())
+    force && throw(DeviceSelectionException("GPU"))
     @warn """No functional GPU backend found! Defaulting to CPU.
 
              1. If no GPU is available, nothing needs to be done.
@@ -147,7 +149,7 @@ for op in (:get_device, :get_device_type)
         end
     end
 
-    for T in (Number, AbstractRNG, Val, Symbol, String, Nothing)
+    for T in (Number, AbstractRNG, Val, Symbol, String, Nothing, AbstractRange)
         @eval $(op)(::$(T)) = $(op == :get_device ? nothing : Nothing)
     end
 end
diff --git a/src/public.jl b/src/public.jl
index 168e2cf..5f1cb86 100644
--- a/src/public.jl
+++ b/src/public.jl
@@ -66,7 +66,7 @@ supported_gpu_backends() = map(Internal.get_device_name, GPU_DEVICES)
 
 """
     gpu_device(device_id::Union{Nothing, Integer}=nothing;
-        force_gpu_usage::Bool=false) -> AbstractDevice()
+        force::Bool=false) -> AbstractDevice
 
 Selects GPU device based on the following criteria:
 
@@ -75,7 +75,7 @@ Selects GPU device based on the following criteria:
  2. Otherwise, an automatic selection algorithm is used. We go over possible device
     backends in the order specified by `supported_gpu_backends()` and select the first
     functional backend.
- 3. If no GPU device is functional and  `force_gpu_usage` is `false`, then `cpu_device()` is
+ 3. If no GPU device is functional and  `force` is `false`, then `cpu_device()` is
     invoked.
  4. If nothing works, an error is thrown.
 
@@ -102,17 +102,24 @@ Selects GPU device based on the following criteria:
 
 ## Keyword Arguments
 
-  - `force_gpu_usage::Bool`: If `true`, then an error is thrown if no functional GPU
+  - `force::Bool`: If `true`, then an error is thrown if no functional GPU
     device is found.
 """
-function gpu_device(device_id::Union{Nothing, <:Integer}=nothing;
-        force_gpu_usage::Bool=false)::AbstractDevice
+function gpu_device(device_id::Union{Nothing, <:Integer}=nothing; force::Bool=false,
+        force_gpu_usage::Union{Missing, Bool}=missing)::AbstractDevice
+    if force_gpu_usage !== missing
+        Base.depwarn(
+            "`force_gpu_usage` is deprecated and will be removed in v2. Use \
+             `force` instead.", :gpu_device)
+        force = force_gpu_usage
+    end
+
     device_id == 0 && throw(ArgumentError("`device_id` is 1-indexed."))
 
     if GPU_DEVICE[] !== nothing
         dev = GPU_DEVICE[]
         if device_id === nothing
-            force_gpu_usage &&
+            force &&
                 !(dev isa AbstractGPUDevice) &&
                 throw(Internal.DeviceSelectionException())
             return dev
@@ -122,7 +129,7 @@ function gpu_device(device_id::Union{Nothing, <:Integer}=nothing;
         end
     end
 
-    device_type = Internal.get_gpu_device(; force_gpu_usage)
+    device_type = Internal.get_gpu_device(; force)
     device = Internal.with_device(device_type, device_id)
     GPU_DEVICE[] = device
 
@@ -179,19 +186,25 @@ Return a `CPUDevice` object which can be used to transfer data to CPU.
 cpu_device() = CPUDevice()
 
 """
-    xla_device() -> XLADevice()
+    xla_device(; force::Bool=false) -> Union{XLADevice, CPUDevice}
 
-Return a `XLADevice` object.
+Return a `XLADevice` object if functional. Otherwise, throw an error if `force` is `true`.
+Falls back to `CPUDevice` if `force` is `false`.
 
 !!! danger
 
     This is an experimental feature and might change without deprecations
 """
-function xla_device()
-    @assert loaded(XLADevice)&&functional(XLADevice) "`XLADevice` is not loaded or not \
-                                                      functional. Load `Reactant.jl` \
-                                                      before calling this function."
-    return XLADevice()
+function xla_device(; force::Bool=false)
+    msg = "`XLADevice` is not loaded or not functional. Load `Reactant.jl` before calling \
+           this function. Defaulting to CPU."
+    if loaded(XLADevice)
+        functional(XLADevice) && return XLADevice()
+        msg = "`XLADevice` is loaded but not functional. Defaulting to CPU."
+    end
+    force && throw(Internal.DeviceSelectionException("XLA"))
+    @warn msg maxlog=1
+    return cpu_device()
 end
 
 """
diff --git a/test/amdgpu_tests.jl b/test/amdgpu_tests.jl
index a4cb8cf..67edff4 100644
--- a/test/amdgpu_tests.jl
+++ b/test/amdgpu_tests.jl
@@ -5,8 +5,7 @@ using ArrayInterface: parameterless_type
     @test !MLDataDevices.functional(AMDGPUDevice)
     @test cpu_device() isa CPUDevice
     @test gpu_device() isa CPUDevice
-    @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(;
-        force_gpu_usage=true)
+    @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(; force=true)
     @test_throws Exception default_device_rng(AMDGPUDevice(nothing))
     @test_logs (:warn, "`AMDGPU.jl` hasn't been loaded. Ignoring the device setting.") MLDataDevices.set_device!(
         AMDGPUDevice, nothing, 1)
@@ -20,12 +19,12 @@ using AMDGPU
     if MLDataDevices.functional(AMDGPUDevice)
         @info "AMDGPU is functional"
         @test gpu_device() isa AMDGPUDevice
-        @test gpu_device(; force_gpu_usage=true) isa AMDGPUDevice
+        @test gpu_device(; force=true) isa AMDGPUDevice
     else
         @info "AMDGPU is NOT functional"
         @test gpu_device() isa CPUDevice
         @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(;
-            force_gpu_usage=true)
+            force=true)
     end
     @test MLDataDevices.GPU_DEVICE[] !== nothing
 end
diff --git a/test/cuda_tests.jl b/test/cuda_tests.jl
index c6cf533..92c0a27 100644
--- a/test/cuda_tests.jl
+++ b/test/cuda_tests.jl
@@ -5,8 +5,7 @@ using ArrayInterface: parameterless_type
     @test !MLDataDevices.functional(CUDADevice)
     @test cpu_device() isa CPUDevice
     @test gpu_device() isa CPUDevice
-    @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(;
-        force_gpu_usage=true)
+    @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(; force=true)
     @test_throws Exception default_device_rng(CUDADevice(nothing))
     @test_logs (:warn, "`CUDA.jl` hasn't been loaded. Ignoring the device setting.") MLDataDevices.set_device!(
         CUDADevice, nothing, 1)
@@ -20,12 +19,12 @@ using LuxCUDA
     if MLDataDevices.functional(CUDADevice)
         @info "LuxCUDA is functional"
         @test gpu_device() isa CUDADevice
-        @test gpu_device(; force_gpu_usage=true) isa CUDADevice
+        @test gpu_device(; force=true) isa CUDADevice
     else
         @info "LuxCUDA is NOT functional"
         @test gpu_device() isa CPUDevice
         @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(;
-            force_gpu_usage=true)
+            force=true)
     end
     @test MLDataDevices.GPU_DEVICE[] !== nothing
 end
diff --git a/test/iterator_tests.jl b/test/iterator_tests.jl
index dbb4d7a..e6db36f 100644
--- a/test/iterator_tests.jl
+++ b/test/iterator_tests.jl
@@ -18,10 +18,18 @@ if BACKEND_GROUP == "oneapi" || BACKEND_GROUP == "all"
     using oneAPI
 end
 
-DEVICES = [CPUDevice, CUDADevice, AMDGPUDevice, MetalDevice, oneAPIDevice]
+if BACKEND_GROUP == "xla" || BACKEND_GROUP == "all"
+    using Reactant
+    if "gpu" in keys(Reactant.XLA.backends)
+        Reactant.set_default_backend("gpu")
+    end
+end
+
+DEVICES = [CPUDevice, CUDADevice, AMDGPUDevice, MetalDevice, oneAPIDevice, XLADevice]
 
 freed_if_can_be_freed(x) = freed_if_can_be_freed(get_device_type(x), x)
 freed_if_can_be_freed(::Type{CPUDevice}, x) = true
+freed_if_can_be_freed(::Type{XLADevice}, x) = true
 function freed_if_can_be_freed(::Type, x)
     try
         Array(x)
@@ -53,17 +61,20 @@ end
 
     @testset "DataLoader: parallel=$parallel" for parallel in (true, false)
         X = rand(Float64, 3, 33)
-        pre = DataLoader(dev(X); batchsize=13, shuffle=false)
-        post = DataLoader(X; batchsize=13, shuffle=false) |> dev
+        pre = DataLoader(dev(X); batchsize=13, shuffle=false, parallel)
+        post = DataLoader(X; batchsize=13, shuffle=false, parallel) |> dev
 
         for epoch in 1:2
             prev_pre, prev_post = nothing, nothing
             for (p, q) in zip(pre, post)
                 @test get_device_type(p) == dev_type
                 @test get_device_type(q) == dev_type
-                @test p ≈ q
+                # Ordering is not guaranteed in parallel
+                !parallel && @test p ≈ q
 
-                dev_type === CPUDevice && continue
+                if dev_type === CPUDevice || dev_type === XLADevice
+                    continue
+                end
 
                 prev_pre === nothing || @test !freed_if_can_be_freed(prev_pre)
                 prev_pre = p
@@ -74,8 +85,8 @@ end
         end
 
         Y = rand(Float64, 1, 33)
-        pre = DataLoader((; x=dev(X), y=dev(Y)); batchsize=13, shuffle=false)
-        post = DataLoader((; x=X, y=Y); batchsize=13, shuffle=false) |> dev
+        pre = DataLoader((; x=dev(X), y=dev(Y)); batchsize=13, shuffle=false, parallel)
+        post = DataLoader((; x=X, y=Y); batchsize=13, shuffle=false, parallel) |> dev
 
         for epoch in 1:2
             prev_pre, prev_post = nothing, nothing
@@ -84,10 +95,13 @@ end
                 @test get_device_type(p.y) == dev_type
                 @test get_device_type(q.x) == dev_type
                 @test get_device_type(q.y) == dev_type
-                @test p.x ≈ q.x
-                @test p.y ≈ q.y
+                # Ordering is not guaranteed in parallel
+                !parallel && @test p.x ≈ q.x
+                !parallel && @test p.y ≈ q.y
 
-                dev_type === CPUDevice && continue
+                if dev_type === CPUDevice || dev_type === XLADevice
+                    continue
+                end
 
                 if prev_pre !== nothing
                     @test !freed_if_can_be_freed(prev_pre.x)
diff --git a/test/metal_tests.jl b/test/metal_tests.jl
index a4dd887..789fa49 100644
--- a/test/metal_tests.jl
+++ b/test/metal_tests.jl
@@ -5,8 +5,7 @@ using ArrayInterface: parameterless_type
     @test !MLDataDevices.functional(MetalDevice)
     @test cpu_device() isa CPUDevice
     @test gpu_device() isa CPUDevice
-    @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(;
-        force_gpu_usage=true)
+    @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(; force=true)
     @test_throws Exception default_device_rng(MetalDevice())
 end
 
@@ -18,12 +17,12 @@ using Metal
     if MLDataDevices.functional(MetalDevice)
         @info "Metal is functional"
         @test gpu_device() isa MetalDevice
-        @test gpu_device(; force_gpu_usage=true) isa MetalDevice
+        @test gpu_device(; force=true) isa MetalDevice
     else
         @info "Metal is NOT functional"
         @test gpu_device() isa MetalDevice
         @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(;
-            force_gpu_usage=true)
+            force=true)
     end
     @test MLDataDevices.GPU_DEVICE[] !== nothing
 end
diff --git a/test/oneapi_tests.jl b/test/oneapi_tests.jl
index f046498..7731c43 100644
--- a/test/oneapi_tests.jl
+++ b/test/oneapi_tests.jl
@@ -5,8 +5,7 @@ using ArrayInterface: parameterless_type
     @test !MLDataDevices.functional(oneAPIDevice)
     @test cpu_device() isa CPUDevice
     @test gpu_device() isa CPUDevice
-    @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(;
-        force_gpu_usage=true)
+    @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(; force=true)
     @test_throws Exception default_device_rng(oneAPIDevice())
 end
 
@@ -18,12 +17,12 @@ using oneAPI
     if MLDataDevices.functional(oneAPIDevice)
         @info "oneAPI is functional"
         @test gpu_device() isa oneAPIDevice
-        @test gpu_device(; force_gpu_usage=true) isa oneAPIDevice
+        @test gpu_device(; force=true) isa oneAPIDevice
     else
         @info "oneAPI is NOT functional"
         @test gpu_device() isa oneAPIDevice
         @test_throws MLDataDevices.Internal.DeviceSelectionException gpu_device(;
-            force_gpu_usage=true)
+            force=true)
     end
     @test MLDataDevices.GPU_DEVICE[] !== nothing
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 65cc190..20555d4 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -9,6 +9,7 @@ const EXTRA_PKGS = String[]
 (BACKEND_GROUP == "all" || BACKEND_GROUP == "amdgpu") && push!(EXTRA_PKGS, "AMDGPU")
 (BACKEND_GROUP == "all" || BACKEND_GROUP == "oneapi") && push!(EXTRA_PKGS, "oneAPI")
 (BACKEND_GROUP == "all" || BACKEND_GROUP == "metal") && push!(EXTRA_PKGS, "Metal")
+(BACKEND_GROUP == "all" || BACKEND_GROUP == "xla") && push!(EXTRA_PKGS, "Reactant")
 
 if !isempty(EXTRA_PKGS)
     @info "Installing Extra Packages for testing" EXTRA_PKGS=EXTRA_PKGS
diff --git a/test/xla_tests.jl b/test/xla_tests.jl
new file mode 100644
index 0000000..81ae929
--- /dev/null
+++ b/test/xla_tests.jl
@@ -0,0 +1,126 @@
+using MLDataDevices, Random, Test
+using ArrayInterface: parameterless_type
+
+@testset "CPU Fallback" begin
+    @test !MLDataDevices.functional(XLADevice)
+    @test cpu_device() isa CPUDevice
+    @test xla_device() isa CPUDevice
+    @test_throws MLDataDevices.Internal.DeviceSelectionException xla_device(; force=true)
+    @test_throws Exception default_device_rng(XLADevice())
+end
+
+using Reactant
+if "gpu" in keys(Reactant.XLA.backends)
+    Reactant.set_default_backend("gpu")
+end
+
+@testset "Loaded Trigger Package" begin
+    if MLDataDevices.functional(XLADevice)
+        @info "Reactant is functional"
+        @test xla_device() isa XLADevice
+        @test xla_device(; force=true) isa XLADevice
+    else
+        @info "Reactant is NOT functional"
+        @test xla_device() isa CPUDevice
+        @test_throws MLDataDevices.Internal.DeviceSelectionException xla_device(;
+            force=true)
+    end
+end
+
+using FillArrays, Zygote  # Extensions
+
+@testset "Data Transfer" begin
+    ps = (a=(c=zeros(10, 1), d=1), b=ones(10, 1), e=:c,
+        d="string", mixed=[2.0f0, 3.0, ones(2, 3)],  # mixed array types
+        range=1:10,
+        rng_default=Random.default_rng(), rng=MersenneTwister(),
+        one_elem=Zygote.OneElement(2.0f0, (2, 3), (1:3, 1:4)), farray=Fill(1.0f0, (2, 3)))
+
+    device = xla_device()
+    aType = MLDataDevices.functional(XLADevice) ? Reactant.ConcreteRArray : Array
+    rngType = Random.AbstractRNG
+
+    ps_xpu = ps |> device
+    @test get_device(ps_xpu) isa XLADevice
+    @test get_device_type(ps_xpu) <: XLADevice
+    @test ps_xpu.a.c isa aType
+    @test ps_xpu.b isa aType
+    @test ps_xpu.a.d == ps.a.d
+    @test ps_xpu.mixed isa Vector
+    @test ps_xpu.mixed[1] isa Float32
+    @test ps_xpu.mixed[2] isa Float64
+    @test ps_xpu.mixed[3] isa aType
+    @test ps_xpu.range isa AbstractRange
+    @test ps_xpu.e == ps.e
+    @test ps_xpu.d == ps.d
+    @test ps_xpu.rng_default isa rngType
+    @test ps_xpu.rng == ps.rng
+
+    if MLDataDevices.functional(XLADevice)
+        @test ps_xpu.one_elem isa Reactant.RArray
+        @test ps_xpu.farray isa Reactant.RArray
+    else
+        @test ps_xpu.one_elem isa Zygote.OneElement
+        @test ps_xpu.farray isa Fill
+    end
+
+    ps_cpu = ps_xpu |> cpu_device()
+    @test get_device(ps_cpu) isa CPUDevice
+    @test get_device_type(ps_cpu) <: CPUDevice
+    @test ps_cpu.a.c isa Array
+    @test ps_cpu.b isa Array
+    @test ps_cpu.a.c == ps.a.c
+    @test ps_cpu.b == ps.b
+    @test ps_cpu.a.d == ps.a.d
+    @test ps_cpu.mixed isa Vector
+    @test ps_cpu.mixed[1] isa Float32
+    @test ps_cpu.mixed[2] isa Float64
+    @test ps_cpu.mixed[3] isa Array
+    @test ps_cpu.range isa AbstractRange
+    @test ps_cpu.e == ps.e
+    @test ps_cpu.d == ps.d
+    @test ps_cpu.rng_default isa Random.TaskLocalRNG
+    @test ps_cpu.rng == ps.rng
+
+    if MLDataDevices.functional(XLADevice)
+        @test ps_cpu.one_elem isa Array
+        @test ps_cpu.farray isa Array
+    else
+        @test ps_cpu.one_elem isa Zygote.OneElement
+        @test ps_cpu.farray isa Fill
+    end
+
+    ps_mixed = (; a=rand(2), b=device(rand(2)))
+    @test_throws ArgumentError get_device(ps_mixed)
+    @test_throws ArgumentError get_device_type(ps_mixed)
+
+    @testset "get_device_type compile constant" begin
+        x = rand(10, 10) |> device
+        ps = (; weight=x, bias=x, d=(x, x))
+
+        return_val(x) = Val(get_device_type(x))  # If it is a compile time constant then type inference will work
+        @test @inferred(return_val(ps)) isa Val{parameterless_type(typeof(device))}
+
+        return_val2(x) = Val(get_device(x))
+        @test @inferred(return_val2(ps)) isa Val{get_device(x)}
+    end
+end
+
+@testset "Wrapped Arrays" begin
+    if MLDataDevices.functional(XLADevice)
+        x = rand(10, 10) |> XLADevice()
+        @test get_device(x) isa XLADevice
+        @test get_device_type(x) <: XLADevice
+        x_view = view(x, 1:5, 1:5)
+        @test get_device(x_view) isa XLADevice
+        @test get_device_type(x_view) <: XLADevice
+    end
+end
+
+@testset "setdevice!" begin
+    if MLDataDevices.functional(XLADevice)
+        @test_logs (:warn,
+            "Setting device for `XLADevice` hasn't been implemented yet. Ignoring the device setting.") MLDataDevices.set_device!(
+            XLADevice, nothing, 1)
+    end
+end