Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: change environment variables to Click/JSON arguments #146

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ack/readers/amazon_s3/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

import click

from ack.readers.amazon_s3.reader import AmazonS3Reader
from ack.utils.args import extract_args
from ack.utils.processor import processor
Expand All @@ -26,6 +27,9 @@
@click.option("--s3-bucket", required=True)
@click.option("--s3-prefix", required=True, multiple=True)
@click.option("--s3-format", required=True, type=click.Choice(["csv", "gz", "njson"]))
@click.option("--s3-region-name", required=True)
@click.option("--s3-access-key-id", required=True)
@click.option("--s3-secret-access-key", required=True)
@click.option("--s3-dest-key-split", default=-1, type=int)
@click.option("--s3-csv-delimiter", default=",")
@click.option("--s3-csv-fieldnames", default=None)
Expand Down
3 changes: 3 additions & 0 deletions ack/readers/amazon_s3/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ class AmazonS3ReaderConfig(BaseModel):
dest_key_split: int = 1
csv_delimiter: str = ","
csv_fieldnames: str = None
region_name: str
access_key_id: str
secret_access_key: str
13 changes: 8 additions & 5 deletions ack/readers/amazon_s3/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,17 @@


class AmazonS3Reader(ObjectStorageReader):
def __init__(self, bucket, prefix, format, dest_key_split=-1, **kwargs):
def __init__(self, bucket, prefix, format, region_name, access_key_id, secret_access_key, dest_key_split=-1, **kwargs):
self._access_key_id = access_key_id
self._secret_access_key = secret_access_key
self._region_name = region_name
super().__init__(bucket, prefix, format, dest_key_split, platform="S3", **kwargs)

def create_client(self, config):
def create_client(self):
boto_config = {
"region_name": config.REGION_NAME,
"aws_access_key_id": config.AWS_ACCESS_KEY_ID,
"aws_secret_access_key": config.AWS_SECRET_ACCESS_KEY,
"region_name": self._region_name,
"aws_access_key_id": self._access_key_id,
"aws_secret_access_key": self._secret_access_key,
}
return boto3.resource("s3", **boto_config)

Expand Down
2 changes: 2 additions & 0 deletions ack/readers/google_cloud_storage/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

import click

from ack.readers.google_cloud_storage.reader import GoogleCloudStorageReader
from ack.utils.args import extract_args
from ack.utils.processor import processor
Expand All @@ -29,6 +30,7 @@
@click.option("--gcs-dest-key-split", default=-1, type=int)
@click.option("--gcs-csv-delimiter", default=",")
@click.option("--gcs-csv-fieldnames", default=None)
@click.option("--gcs-project-id", required=True)
@processor()
def google_cloud_storage(**kwargs):
return GoogleCloudStorageReader(**extract_args("gcs_", kwargs))
1 change: 1 addition & 0 deletions ack/readers/google_cloud_storage/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class GoogleCloudStorageReaderConfig(BaseModel):
bucket: str
prefix: List[str]
format: Literal[FORMATS]
project_id: str
dest_key_split: int = -1
csv_delimiter: str = ","
fieldnames: str = None
7 changes: 4 additions & 3 deletions ack/readers/google_cloud_storage/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@


class GoogleCloudStorageReader(ObjectStorageReader, GoogleClient):
def __init__(self, bucket, prefix, format, dest_key_split=-1, **kwargs):
def __init__(self, bucket, prefix, format, project_id, dest_key_split=-1, **kwargs):
self._project_id = project_id
super().__init__(bucket, prefix, format, dest_key_split, platform="GCS", **kwargs)

def create_client(self, config):
return storage.Client(credentials=self._get_credentials(), project=config.project_id)
def create_client(self):
return storage.Client(credentials=self._get_credentials(), project=self._project_id)

def create_bucket(self, client, bucket):
return client.bucket(bucket)
Expand Down
5 changes: 2 additions & 3 deletions ack/readers/object_storage/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import tempfile

from ack import config
from ack.config import logger
from ack.readers.reader import Reader
from ack.streams.json_stream import JSONStream
Expand All @@ -27,7 +26,7 @@

class ObjectStorageReader(Reader):
def __init__(self, bucket, prefix, file_format, dest_key_split, platform=None, **kwargs):
self._client = self.create_client(config)
self._client = self.create_client()
self._bucket = self.create_bucket(self._client, bucket)
self._prefix_list = prefix
self._platform = platform
Expand Down Expand Up @@ -70,7 +69,7 @@ def _result_generator(self, _object):
def is_compatible_object(self, _object):
return self.get_key(_object).endswith("." + self._format)

def create_client(self, config):
def create_client(self):
raise NotImplementedError

def create_bucket(self, client, bucket):
Expand Down
1 change: 1 addition & 0 deletions ack/writers/google_bigquery/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

@click.command(name="write_bq")
@click.option("--bq-dataset", required=True)
@click.option("--bq-project-id", required=True)
@click.option("--bq-table", required=True)
@click.option("--bq-bucket", required=True)
@click.option("--bq-partition-column")
Expand Down
1 change: 1 addition & 0 deletions ack/writers/google_bigquery/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

class GoogleBigQueryWriterConfig(BaseModel):
dataset: str
project_id: str
table: str
bucket: str
partition_column: str = None
Expand Down
5 changes: 2 additions & 3 deletions ack/writers/google_bigquery/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

from google.cloud import bigquery
from ack import config
from ack.config import logger
from ack.clients.google.client import GoogleClient
from ack.streams.normalized_json_stream import NormalizedJSONStream
Expand All @@ -30,10 +29,10 @@ class GoogleBigQueryWriter(Writer, GoogleClient):
_client = None

def __init__(
self, dataset, table, bucket, partition_column, write_disposition, location, keep_files,
self, dataset, project_id, table, bucket, partition_column, write_disposition, location, keep_files,
):

self._project_id = config.PROJECT_ID
self._project_id = project_id
self._client = bigquery.Client(credentials=self._get_credentials(), project=self._project_id)
self._dataset = dataset
self._table = table
Expand Down
2 changes: 1 addition & 1 deletion ack/writers/google_cloud_storage/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
@click.command(name="write_gcs")
@click.option("--gcs-bucket", help="GCS Bucket", required=True)
@click.option("--gcs-prefix", help="GCS path to write the file.")
@click.option("--gcs-project-id", help="GCS Project Id")
@click.option("--gcs-project-id", help="GCS Project Id", required=True)
@click.option(
"--gcs-filename", help="Override the default name of the file (don't add the extension)",
)
Expand Down
15 changes: 1 addition & 14 deletions ack/writers/google_cloud_storage/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,14 @@
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

import click
from google.cloud import storage
from ack import config
from ack.clients.google.client import GoogleClient
from ack.writers.object_storage.writer import ObjectStorageWriter


class GoogleCloudStorageWriter(ObjectStorageWriter, GoogleClient):
def __init__(self, bucket, project_id, prefix=None, filename=None, **kwargs):
self._project_id = self.get_project_id(project_id)
self._project_id = project_id
super().__init__(bucket, prefix, filename, platform="GCS", **kwargs)

def _create_client(self):
Expand All @@ -43,14 +41,3 @@ def _create_blob(self, file_name, stream):

def _get_uri(self, file_name):
return f"gs{self._get_file_path(file_name)}"

@staticmethod
def get_project_id(project_id):
if project_id is None:
try:
return config.PROJECT_ID
except Exception:
raise click.exceptions.MissingParameter(
"Please provide a project id in ENV var or params.", param_type="--gcs-project-id",
)
return project_id
17 changes: 6 additions & 11 deletions docs/source/readers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,15 +158,7 @@ Source API
Quickstart
----------

Execute the following commands to set your credentials:

.. code-block:: shell

export REGION_NAME=<S3 bucket region>
export AWS_ACCESS_KEY_ID=<S3 access key ID>
export AWS_SECRET_ACCESS_KEY=<S3 access key secret>

Once done, launch your S3 reader command. The following command retrieves the blobs located under the Amazon S3 bucket ``daily_reports`` and the blob prefix ``FR/offline_sales/``.
The following command retrieves the blobs located under the Amazon S3 bucket ``daily_reports`` and the blob prefix ``FR/offline_sales/``.

.. code-block:: shell

Expand All @@ -192,6 +184,9 @@ CMD Options JSON Options Definition
``--s3-bucket`` ``bucket`` S3 bucket name
``--s3-prefix`` ``prefix`` (list) S3 blob prefix. Several prefixes can be provided in a single command.
``--s3-format`` ``format`` S3 blob format. Possible values: csv, gz.
``--s3-region-name`` ``region_name`` Name of the bucket's region
``--s3-access-key-id`` ``access_key_id`` Access key Id for AWS
``--s3-secret-access-key`` ``secret_access_key`` Secret access key for AWS
``--s3-dest-key-split`` ``dest_key_split`` Indicates how to retrieve a blob name from a blob key (a blob key being the combination of a blob prefix and a blob name: <BLOB_PREFIX>/<BLOB_NAME>). The reader splits the blob key on the "/" character: the last element of the output list is considered as the blob name, and is used to name the stream produced by the reader. This option defines how many splits to do. Default: -1 (split on all occurences).
``--s3-csv-delimiter`` ``csv_delimiter`` Delimiter that should be used to read the .csv file. Default: ,
``--s3-csv-fieldnames`` ``fieldnames`` List of field names. If set to None (default), the values in the first row of .csv file will be used as field names.
Expand Down Expand Up @@ -593,11 +588,10 @@ Follow these steps to set your credentials:

- In your GCP project, create a Service Account with a 'Storage Object Viewer' role
- Create a .JSON key for this Service Account, and download the key file locally
- Execute the following commands:
- Execute the following command:

.. code-block:: shell

export project_id=<GCP project ID>
export GCP_KEY_PATH=<Path to the Service Account key file>

Once done, launch your Google Cloud Storage reader command. The following command retrieves the blobs located under the Google Cloud Storage bucket ``daily_reports`` and the blob prefix ``FR/offline_sales/``:
Expand Down Expand Up @@ -626,6 +620,7 @@ CMD Options JSON Options Definition
``--gcs-bucket`` ``bucket`` Cloud Storage bucket name
``--gcs-prefix`` ``prefix`` (list) Cloud Storage blob prefix. Several prefixes can be provided in a single command.
``--gcs-format`` ``format`` Cloud Storage blob format. *Possible values: csv, gz*
``--gcs-project-id`` ``project_id`` GCP project ID
``--gcs-dest-key-split`` ``dest_key-split`` Indicates how to retrieve a blob name from a blob key (a blob key being the combination of a blob prefix and a blob name: <BLOB_PREFIX>/<BLOB_NAME>). The reader splits the blob key on the "/" character: the last element of the output list is considered as the blob name, and is used to name the stream produced by the reader. This option defines how many splits to do. *Default: -1 (split on all occurences)*
``--gcs-csv-delimiter`` ``csv_delimiter`` Delimiter that should be used to read the .csv file. *Default: ,*
``--gcs-csv-fieldnames`` ``csv_fieldnames`` List of field names. If set to *None* (*default*), the values in the first row of .csv file will be used as field names.
Expand Down
1 change: 1 addition & 0 deletions docs/source/writers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ Command options
CMD Options JSON Options Definition
============================== ====================== =================================================================================================================================================
``--bq-dataset`` ``dataset`` BigQuery dataset name
``--bq-project-id`` ``project_id`` GCP project ID
``--bq-table`` ``table`` BigQuery table name
``--bq-write-disposition`` ``write-disposition`` BigQuery write disposition. Possible values: TRUNCATE (default), APPEND
``--bq-partition-column`` ``partition-column`` (Optional) Field to be used as a partition column (more information on `this page <https://cloud.google.com/bigquery/docs/partitioned-tables>`__)
Expand Down