Skip to content

Commit

Permalink
Working implementation of GCS stream download
Browse files Browse the repository at this point in the history
  • Loading branch information
ivgiuliani committed Apr 12, 2023
1 parent 4649dea commit dbe4ff2
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions lib/bucket_store/gcs.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ module BucketStore
class Gcs
DEFAULT_TIMEOUT_SECONDS = 30

DEFAULT_STREAM_CHUNK_SIZE_BYTES = 1024 * 1024 * 4 # 4Mb

def self.build(timeout_seconds = DEFAULT_TIMEOUT_SECONDS)
Gcs.new(timeout_seconds)
end
Expand Down Expand Up @@ -56,6 +58,41 @@ def download(bucket:, key:)
}
end

def stream_download(bucket:, key:, chunk_size: nil)
chunk_size ||= DEFAULT_STREAM_CHUNK_SIZE_BYTES

file = get_bucket(bucket).file(key)
metadata = {
bucket: bucket,
key: key,
}.freeze

obj_size = file.size

Enumerator.new do |yielder|
start = 0
loop do
stop = [start + chunk_size, obj_size].min
break if stop.zero? || start >= stop

# We simulate an enumerator-based streaming approach by using partial range
# downloads as there's no direct support for streaming downloads. The returned
# object is a StringIO, so we must `.rewind` before we can access it.
obj_io = file.download(range: start..stop)
obj_io&.rewind

# rubocop:disable Style/ZeroLengthPredicate
# StringIO does not define the `.empty?` method that rubocop is so keen on using
body = obj_io&.read
break if body.nil? || body.size.zero?
# rubocop:enable Style/ZeroLengthPredicate

yielder.yield([metadata, body])
start += body.size
end
end
end

def list(bucket:, key:, page_size:)
Enumerator.new do |yielder|
token = nil
Expand Down

0 comments on commit dbe4ff2

Please sign in to comment.