Skip to content

Commit 433abb2

Browse files
committed
simplified to download the whole file
1 parent 2616af9 commit 433abb2

File tree

2 files changed

+7
-51
lines changed

2 files changed

+7
-51
lines changed

Diff for: README.md

+1-33
Original file line numberDiff line numberDiff line change
@@ -181,36 +181,4 @@ psql> select * from animals;
181181
parrot | 103
182182
tortoise | 205
183183
(4 rows)
184-
```
185-
186-
### Configuration
187-
188-
One can tune some parameters such as:
189-
* `aws_s3.buffer_chunk_size` (chunk size when reading data from S3). Default is 262,144 bytes.
190-
* `aws_s3.buffer_lines` (number of lines to be imported in the table at each time). Default is 1,000 lines.
191-
192-
One can update these parameters at the session level or at database level.
193-
194-
At the session level:
195-
```postgresql
196-
SET aws_s3.buffer_chunk_size = 1000000;
197-
SET aws_s3.buffer_num_lines = 1000;
198-
```
199-
200-
At the database level:
201-
```postgresql
202-
ALTER DATABASE db SET aws_s3.buffer_chunk_size = 1000000;
203-
ALTER DATABASE db SET aws_s3.buffer_num_lines = 1000;
204-
205-
-- Then reopen postgres
206-
```
207-
208-
Then to check the values of the settings:
209-
```postgresql
210-
SELECT current_setting('aws_s3.buffer_chunk_size', true) AS chunk_size, current_setting('aws_s3.buffer_num_lines', true) as num_lines;
211-
212-
chunk_size | num_lines
213-
------------+-----------
214-
1000000 | 1000
215-
(1 row)
216-
```
184+
```

Diff for: aws_s3--0.0.1.sql

+6-18
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,7 @@ AS $$
6666
boto3 = cache_import('boto3')
6767
tempfile = cache_import('tempfile')
6868

69-
plan = plpy.prepare('select current_setting($1, true)', ['TEXT'])
70-
buffer_chunk_size = plan.execute(['aws_s3.buffer_chunk_size'])[0]['current_setting'] or 131072
71-
buffer_num_lines = plan.execute(['aws_s3.buffer_num_lines'])[0]['current_setting'] or 10000
69+
plan = plpy.prepare('select current_setting($1, true)::int', ['TEXT'])
7270

7371
s3 = boto3.client(
7472
's3',
@@ -78,27 +76,17 @@ AS $$
7876
region_name=region
7977
)
8078

81-
def copy_rows(fd):
79+
with tempfile.NamedTemporaryFile() as fd:
80+
s3.download_fileobj(bucket, file_path, fd)
8281
fd.flush()
83-
res = plpy.execute("COPY {table_name} {column_list} FROM '{filename}' {options};".format(
82+
res = plpy.execute("COPY {table_name} {column_list} FROM {filename} {options};".format(
8483
table_name=table_name,
84+
filename=plpy.quote_literal(fd.name),
8585
column_list=column_list,
86-
filename=fd.name,
8786
options=options
8887
)
8988
)
90-
fd.seek(0)
91-
92-
lines = s3.get_object(Bucket=bucket, Key=file_path)['Body'].iter_lines(chunk_size=buffer_chunk_size)
93-
with tempfile.NamedTemporaryFile() as fd:
94-
for line_num, line in enumerate(lines):
95-
if (line_num + 1) % buffer_num_lines == 0:
96-
copy_rows(fd)
97-
98-
fd.write(line + '\n')
99-
100-
copy_rows(fd)
101-
return line_num
89+
return res.nrows()
10290
$$;
10391

10492
--

0 commit comments

Comments
 (0)