Skip to content

Commit

Permalink
#3504 use async calls using a cuda stream
Browse files Browse the repository at this point in the history
  • Loading branch information
totaam committed Aug 21, 2022
1 parent 2a5e368 commit b9c24fa
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 9 deletions.
14 changes: 9 additions & 5 deletions xpra/client/gl/gl_window_backing_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file is part of Xpra.
# Copyright (C) 2013 Serviware (Arthur Huillet, <[email protected]>)
# Copyright (C) 2012-2021 Antoine Martin <[email protected]>
# Copyright (C) 2012-2022 Antoine Martin <[email protected]>
# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
# later version. See the file COPYING for details.

Expand Down Expand Up @@ -1074,6 +1074,9 @@ def paint_nvjpeg(gl_context):

def paint_nvjpeg(self, gl_context, encoding, img_data, x : int, y : int, width : int, height : int, options, callbacks):
with self.assign_cuda_context(True):
from pycuda.driver import Stream # @UnresolvedImport
stream = Stream()
options["stream"] = stream
img = self.nvjpeg_decoder.decompress_with_device("RGB", img_data, options)
log("paint_nvjpeg(%s) img=%s, downloading buffer to pbo", gl_context, img)
#'pixels' is a cuda buffer:
Expand All @@ -1086,18 +1089,19 @@ def paint_nvjpeg(self, gl_context, encoding, img_data, x : int, y : int, width :
glBufferData(GL_PIXEL_UNPACK_BUFFER, size, None, GL_STREAM_DRAW)
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0)
#pylint: disable=import-outside-toplevel
from pycuda.driver import memcpy_dtod #pylint: disable=no-name-in-module
from pycuda.driver import memcpy_dtod_async #pylint: disable=no-name-in-module
from pycuda.gl import RegisteredBuffer, graphics_map_flags # @UnresolvedImport
cuda_pbo = RegisteredBuffer(int(pbo), graphics_map_flags.WRITE_DISCARD)
log("RegisteredBuffer%s=%s", (pbo, graphics_map_flags.WRITE_DISCARD), cuda_pbo)
mapping = cuda_pbo.map()
mapping = cuda_pbo.map(stream)
ptr, msize = mapping.device_ptr_and_size()
assert msize>=size, "registered buffer size %i too small for pbo size %i" % (msize, size)
log("copying %i bytes from %s to mapping=%s at %#x", size, cuda_buffer, mapping, ptr)
memcpy_dtod(ptr, cuda_buffer, size)
mapping.unmap()
memcpy_dtod_async(ptr, cuda_buffer, size, stream)
mapping.unmap(stream)
cuda_pbo.unregister()
cuda_buffer.free()
stream.synchronize()

rgb_format = img.get_pixel_format()
assert rgb_format in ("RGB", "BGR", "RGBA", "BGRA"), "unexpected rgb format %r" % (rgb_format,)
Expand Down
4 changes: 2 additions & 2 deletions xpra/codecs/cuda_common/cuda_context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
# This file is part of Xpra.
# Copyright (C) 2013-2019 Antoine Martin <[email protected]>
# Copyright (C) 2013-2022 Antoine Martin <[email protected]>
# Xpra is released under the terms of the GNU GPL v2, or, at your option, any
# later version. See the file COPYING for details.

Expand Down Expand Up @@ -461,7 +461,7 @@ def make_context(self):
start = monotonic()
cf = driver.ctx_flags
if self.opengl:
from pycuda import gl
from pycuda import gl # @UnresolvedImport
self.context = gl.make_context(self.device)
else:
self.context = self.device.make_context(flags=cf.SCHED_YIELD | cf.MAP_HOST)
Expand Down
7 changes: 5 additions & 2 deletions xpra/codecs/nvjpeg/decoder.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,10 @@ def decompress_with_device(rgb_format, img_data, options=None):
else:
raise ValueError("invalid rgb format %r" % rgb_format)
cdef nvjpegImage_t nv_image
stream = (options or {}).get("stream", None)
cdef cudaStream_t nv_stream = NULL
if stream:
nv_stream = <cudaStream_t> (<uintptr_t> stream.handle)
cdef nvjpegStatus_t r
cdef uintptr_t dmem = 0
cdef int rowstride = 0, width = 0, height = 0
Expand Down Expand Up @@ -200,7 +203,7 @@ def decompress_with_device(rgb_format, img_data, options=None):
memcpy.set_src_device(rgb)
memcpy.set_dst_device(rgba)
memcpy.height = width*height
memcpy(aligned=False)
memcpy(stream)
rgb.free()
#fill in the alpha channel:
memcpy = Memcpy2D()
Expand All @@ -212,7 +215,7 @@ def decompress_with_device(rgb_format, img_data, options=None):
memcpy.set_src_device(alpha)
memcpy.set_dst_device(rgba)
memcpy.height = alpha_size
memcpy(aligned=False)
memcpy(stream)
alpha.free()
end = monotonic()
log("alpha merge took %ims", 1000*(end-start))
Expand Down

0 comments on commit b9c24fa

Please sign in to comment.