Skip to content

Commit

Permalink
[infra] Melhora inicialização do CLI (#513)
Browse files Browse the repository at this point in the history
* auto copies staging if same as prod

* erase test string

* adds config init to CLI and erases overwrite_config

* modifica docs com novo comando do CLI

* bump to version 1.5.4

* small code refactoring

* revert csv refactoring

Co-authored-by: d116626 <[email protected]>
  • Loading branch information
JoaoCarabetta and d116626 authored Jun 28, 2021
1 parent 1e0426e commit c409a8d
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 87 deletions.
2 changes: 1 addition & 1 deletion docs/colab_data.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ Para criar um projeto no Google Cloud basta ter um email cadastrado no Google. S
No seu terminal:

- Instale nosso cliente: `pip install basedosdados`.
- Rode `basedosdados config` e siga o passo a passo para configurar localmente com as credenciais de seu projeto no Google Cloud.
- Rode `basedosdados config init` e siga o passo a passo para configurar localmente com as credenciais de seu projeto no Google Cloud.
- Clone um _fork_ do nosso [repositório](https://github.com/basedosdados/mais) localmente.
- Dê um `cd` para a pasta local do repositório e abra uma nova branch com `git checkout -b [BRANCH_ID]`.

Expand Down
11 changes: 8 additions & 3 deletions python-package/basedosdados/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,11 +653,16 @@ def cli_config():
pass


@cli_config.command(name="overwrite_cli_config", help="Overwrite current configuration")
@cli_config.command(name="init", help="Initialize configuration")
@click.option(
"--overwrite",
default=False,
help="Wheteher to overwrite current config",
)
@click.pass_context
def init_overwrite_cli_config(ctx):
def init(ctx, overwrite):

Base(overwrite_cli_config=True, **ctx.obj)
Base(overwrite_cli_config=overwrite, **ctx.obj)


@cli_config.command(name="refresh_template", help="Overwrite current templates")
Expand Down
55 changes: 27 additions & 28 deletions python-package/basedosdados/download/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,21 @@

def credentials(from_file=False, reauth=False):

if from_file:
return Base()._load_credentials(mode="prod")

SCOPES = [
"https://www.googleapis.com/auth/cloud-platform",
]

if from_file:
return Base()._load_credentials(mode="prod")

if reauth:
return pydata_google_auth.get_user_credentials(
SCOPES, credentials_cache=pydata_google_auth.cache.REAUTH
)
else:

if reauth:
return pydata_google_auth.get_user_credentials(
SCOPES, credentials_cache=pydata_google_auth.cache.REAUTH
)
else:
return pydata_google_auth.get_user_credentials(
SCOPES,
)
return pydata_google_auth.get_user_credentials(
SCOPES,
)


def download(
Expand Down Expand Up @@ -117,7 +115,7 @@ def download(
reauth=reauth,
)

elif query is None:
else:
raise BaseDosDadosException(
"Either table_id, dataset_id or query should be filled."
)
Expand Down Expand Up @@ -163,21 +161,22 @@ def read_sql(query, billing_project_id=None, from_file=False, reauth=False):
project_id=billing_project_id,
)
except (OSError, ValueError) as e:
msg = ( "\nWe are not sure which Google Cloud project should be billed.\n"
"First, you should make sure that you have a Google Cloud project.\n"
"If you don't have one, set one up following these steps: \n"
"\t1. Go to this link https://console.cloud.google.com/projectselector2/home/dashboard\n"
"\t2. Agree with Terms of Service if asked\n"
"\t3. Click in Create Project\n"
"\t4. Put a cool name in your project\n"
"\t5. Hit create\n"
"\n"
"Copy the Project ID, (notice that it is not the Project Name)\n"
"Now, you have two options:\n"
"1. Add an argument to your function poiting to the billing project id.\n"
" Like `bd.read_table('br_ibge_pib', 'municipios', billing_project_id=<YOUR_PROJECT_ID>)`\n"
"2. You can set a project_id in the environment by running the following command in your terminal: `gcloud config set project <YOUR_PROJECT_ID>`.\n"
" Bear in mind that you need `gcloud` installed."
msg = (
"\nWe are not sure which Google Cloud project should be billed.\n"
"First, you should make sure that you have a Google Cloud project.\n"
"If you don't have one, set one up following these steps: \n"
"\t1. Go to this link https://console.cloud.google.com/projectselector2/home/dashboard\n"
"\t2. Agree with Terms of Service if asked\n"
"\t3. Click in Create Project\n"
"\t4. Put a cool name in your project\n"
"\t5. Hit create\n"
"\n"
"Copy the Project ID, (notice that it is not the Project Name)\n"
"Now, you have two options:\n"
"1. Add an argument to your function poiting to the billing project id.\n"
" Like `bd.read_table('br_ibge_pib', 'municipios', billing_project_id=<YOUR_PROJECT_ID>)`\n"
"2. You can set a project_id in the environment by running the following command in your terminal: `gcloud config set project <YOUR_PROJECT_ID>`.\n"
" Bear in mind that you need `gcloud` installed."
)
raise BaseDosDadosException(msg) from e
except GenericGBQException as e:
Expand Down
12 changes: 9 additions & 3 deletions python-package/basedosdados/upload/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def _init_config(self, force):
)
c_file["gcloud-projects"]["staging"]["name"] = project_staging

############# STEP 5 - PROD CREDS. #######################
############# STEP 4 - PROD CREDS. #######################

project_prod = self._selection_yn(
first_question=(
Expand All @@ -227,8 +227,14 @@ def _init_config(self, force):
),
default_no="basedosdados",
)

self._check_credentials(project_prod, "prod", credentials_path)
# skip credentials with project staging is the same
if project_prod == project_staging:
shutil.copy(
(credentials_path / "staging.json"),
(credentials_path / "prod.json"),
)
else:
self._check_credentials(project_prod, "prod", credentials_path)

c_file["gcloud-projects"]["prod"]["credentials_path"] = str(
credentials_path / "prod.json"
Expand Down
6 changes: 1 addition & 5 deletions python-package/basedosdados/upload/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,7 @@ def dataset_config(self):

def _loop_modes(self, mode="all"):

if mode == "all":
mode = ["prod", "staging"]
else:
mode = [mode]

mode = ["prod", "staging"] if mode == "all" else [mode]
dataset_tag = lambda m: f"_{m}" if m == "staging" else ""

return (
Expand Down
2 changes: 1 addition & 1 deletion python-package/basedosdados/upload/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,4 @@ def external_config(self):

_external_config.hive_partitioning = self.partition()

return _external_config
return _external_config
67 changes: 22 additions & 45 deletions python-package/basedosdados/upload/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def _resolve_partitions(self, partitions):

if isinstance(partitions, dict):

return "/".join([f"{k}={v}" for k, v in partitions.items()]) + "/"
return "/".join(f"{k}={v}" for k, v in partitions.items()) + "/"

elif isinstance(partitions, str):

Expand Down Expand Up @@ -172,11 +172,7 @@ def upload(

self._check_mode(mode)

if mode == "all":
mode = ["raw", "staging"]
else:
mode = [mode]

mode = ["raw", "staging"] if mode == "all" else [mode]
for m in mode:

for filepath, part in tqdm(list(zip(paths, parts)), desc="Uploading files"):
Expand All @@ -191,11 +187,7 @@ def upload(

blob.upload_from_filename(str(filepath), **upload_args)

elif if_exists == "pass":

pass

else:
elif if_exists != "pass":
raise Exception(
f"Data already exists at {self.bucket_name}/{blob_name}. "
"Set if_exists to 'replace' to overwrite data"
Expand Down Expand Up @@ -254,14 +246,10 @@ def download(
prefix += self._resolve_partitions(partitions)

# if no filename is passed, list all blobs within a given table
if filename == "*":
blob_list = list(self.bucket.list_blobs(prefix=prefix))

# if filename is passed, append it to the prefix to narrow the search
else:
if filename != "*":
prefix += filename

blob_list = list(self.bucket.list_blobs(prefix=prefix))
blob_list = list(self.bucket.list_blobs(prefix=prefix))

# if there are no blobs matching the search raise FileNotFoundError or return
if blob_list == []:
Expand Down Expand Up @@ -305,21 +293,15 @@ def delete_file(self, filename, mode, partitions=None, not_found_ok=False):

self._check_mode(mode)

if mode == "all":
mode = ["raw", "staging"]
else:
mode = [mode]

mode = ["raw", "staging"] if mode == "all" else [mode]
for m in mode:

blob = self.bucket.blob(self._build_blob_name(filename, m, partitions))

if blob.exists():
if blob.exists() or not blob.exists() and not not_found_ok:
blob.delete()
elif not_found_ok:
return
else:
blob.delete()
return

def delete_table(self, mode="staging", bucket_name=None, not_found_ok=False):
"""Deletes a table from storage, sends request in batches.
Expand Down Expand Up @@ -404,29 +386,24 @@ def copy_table(
f"Could not find the requested table {self.dataset_id}.{self.table_id}"
)

else:

if destination_bucket_name is None:
if destination_bucket_name is None:

destination_bucket = self.bucket
destination_bucket = self.bucket

else:
else:

destination_bucket = self.client["storage_staging"].bucket(
destination_bucket_name
)
destination_bucket = self.client["storage_staging"].bucket(
destination_bucket_name
)

# Divides source_table_ref list for maximum batch request size
source_table_ref_chunks = [
source_table_ref[i : i + 999]
for i in range(0, len(source_table_ref), 999)
]
# Divides source_table_ref list for maximum batch request size
source_table_ref_chunks = [
source_table_ref[i : i + 999] for i in range(0, len(source_table_ref), 999)
]

for source_table in source_table_ref_chunks:
for source_table in source_table_ref_chunks:

with self.client["storage_staging"].batch():
with self.client["storage_staging"].batch():

for blob in source_table:
self.bucket.copy_blob(
blob, destination_bucket=destination_bucket
)
for blob in source_table:
self.bucket.copy_blob(blob, destination_bucket=destination_bucket)
2 changes: 1 addition & 1 deletion python-package/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ packages = [
]
readme = "README.md"
repository = "https://github.com/base-dos-dados/bases"
version = "1.5.3"
version = "1.5.4"

[tool.poetry.scripts]
basedosdados = 'basedosdados.cli.cli:cli'
Expand Down

0 comments on commit c409a8d

Please sign in to comment.