From 1cb426cf87b7475df8e59adbc3d9860b48195e5d Mon Sep 17 00:00:00 2001 From: fatbuddy Date: Sat, 30 Mar 2024 13:27:54 -0700 Subject: [PATCH] Fix the styling issues caused by black lib --- dataprep/__init__.py | 1 + dataprep/clean/address_utils.py | 1 + dataprep/clean/clean_ad_nrt.py | 1 + dataprep/clean/clean_al_nipt.py | 1 + dataprep/clean/clean_ar_cbu.py | 1 + dataprep/clean/clean_ar_cuit.py | 1 + dataprep/clean/clean_ar_dni.py | 1 + dataprep/clean/clean_at_uid.py | 1 + dataprep/clean/clean_at_vnr.py | 1 + dataprep/clean/clean_au_abn.py | 1 + dataprep/clean/clean_au_acn.py | 1 + dataprep/clean/clean_au_tfn.py | 1 + dataprep/clean/clean_be_iban.py | 1 + dataprep/clean/clean_be_vat.py | 1 + dataprep/clean/clean_bg_egn.py | 1 + dataprep/clean/clean_bg_pnf.py | 1 + dataprep/clean/clean_bg_vat.py | 1 + dataprep/clean/clean_bic.py | 1 + dataprep/clean/clean_bitcoin.py | 1 + dataprep/clean/clean_br_cnpj.py | 1 + dataprep/clean/clean_br_cpf.py | 1 + dataprep/clean/clean_by_unp.py | 1 + dataprep/clean/clean_ca_bn.py | 1 + dataprep/clean/clean_ca_sin.py | 1 + dataprep/clean/clean_casrn.py | 1 + dataprep/clean/clean_ch_esr.py | 1 + dataprep/clean/clean_ch_ssn.py | 1 + dataprep/clean/clean_ch_uid.py | 1 + dataprep/clean/clean_ch_vat.py | 1 + dataprep/clean/clean_cl_rut.py | 1 + dataprep/clean/clean_cn_ric.py | 1 + dataprep/clean/clean_cn_uscc.py | 1 + dataprep/clean/clean_co_nit.py | 1 + dataprep/clean/clean_country.py | 5 +- dataprep/clean/clean_cr_cpf.py | 1 + dataprep/clean/clean_cr_cpj.py | 1 + dataprep/clean/clean_cr_cr.py | 1 + dataprep/clean/clean_cu_ni.py | 1 + dataprep/clean/clean_currency.py | 1 - dataprep/clean/clean_cusip.py | 1 + dataprep/clean/clean_cy_vat.py | 1 + dataprep/clean/clean_cz_dic.py | 1 + dataprep/clean/clean_cz_rc.py | 1 + dataprep/clean/clean_date.py | 1 + dataprep/clean/clean_date_utils.py | 3 +- .../clean/clean_de_handelsregisternummer.py | 1 + dataprep/clean/clean_de_idnr.py | 1 + dataprep/clean/clean_de_stnr.py | 1 + dataprep/clean/clean_de_vat.py | 1 + dataprep/clean/clean_de_wkn.py | 1 + dataprep/clean/clean_df.py | 1 + dataprep/clean/clean_df_gui.py | 1 + dataprep/clean/clean_dk_cpr.py | 1 + dataprep/clean/clean_dk_cvr.py | 1 + dataprep/clean/clean_do_cedula.py | 1 + dataprep/clean/clean_do_ncf.py | 1 + dataprep/clean/clean_do_rnc.py | 1 + dataprep/clean/clean_ean.py | 1 + dataprep/clean/clean_ec_ci.py | 1 + dataprep/clean/clean_ec_ruc.py | 1 + dataprep/clean/clean_ee_ik.py | 1 + dataprep/clean/clean_ee_kmkr.py | 1 + dataprep/clean/clean_ee_registrikood.py | 1 + dataprep/clean/clean_email.py | 1 + dataprep/clean/clean_es_ccc.py | 1 + dataprep/clean/clean_es_cif.py | 1 + dataprep/clean/clean_es_cups.py | 1 + dataprep/clean/clean_es_dni.py | 1 + dataprep/clean/clean_es_iban.py | 1 + dataprep/clean/clean_es_nie.py | 1 + dataprep/clean/clean_es_nif.py | 1 + .../clean/clean_es_referenciacatastral.py | 1 + dataprep/clean/clean_eu_at_02.py | 1 + dataprep/clean/clean_eu_banknote.py | 1 + dataprep/clean/clean_eu_eic.py | 1 + dataprep/clean/clean_eu_nace.py | 1 + dataprep/clean/clean_eu_vat.py | 1 + dataprep/clean/clean_fi_alv.py | 1 + dataprep/clean/clean_fi_associationid.py | 1 + dataprep/clean/clean_fi_hetu.py | 1 + dataprep/clean/clean_fi_veronumero.py | 1 + dataprep/clean/clean_fi_ytunnus.py | 1 + dataprep/clean/clean_figi.py | 1 + dataprep/clean/clean_fr_nif.py | 1 + dataprep/clean/clean_fr_nir.py | 1 + dataprep/clean/clean_fr_siren.py | 1 + dataprep/clean/clean_fr_siret.py | 1 + dataprep/clean/clean_fr_tva.py | 1 + dataprep/clean/clean_gb_nhs.py | 1 + dataprep/clean/clean_gb_sedol.py | 1 + dataprep/clean/clean_gb_upn.py | 1 + dataprep/clean/clean_gb_utr.py | 1 + dataprep/clean/clean_gb_vat.py | 1 + dataprep/clean/clean_gr_amka.py | 1 + dataprep/clean/clean_gr_vat.py | 1 + dataprep/clean/clean_grid.py | 1 + dataprep/clean/clean_gt_nit.py | 1 + dataprep/clean/clean_headers.py | 1 + dataprep/clean/clean_hr_oib.py | 1 + dataprep/clean/clean_hu_anum.py | 1 + dataprep/clean/clean_iban.py | 1 + dataprep/clean/clean_id_npwp.py | 1 + dataprep/clean/clean_ie_pps.py | 1 + dataprep/clean/clean_ie_vat.py | 1 + dataprep/clean/clean_il_hp.py | 1 + dataprep/clean/clean_il_idnr.py | 1 + dataprep/clean/clean_imei.py | 1 + dataprep/clean/clean_imo.py | 1 + dataprep/clean/clean_imsi.py | 1 + dataprep/clean/clean_in_aadhaar.py | 1 + dataprep/clean/clean_in_pan.py | 1 + dataprep/clean/clean_ip.py | 1 + dataprep/clean/clean_is_kennitala.py | 1 + dataprep/clean/clean_is_vsk.py | 1 + dataprep/clean/clean_isan.py | 1 + dataprep/clean/clean_isbn.py | 1 + dataprep/clean/clean_isil.py | 1 + dataprep/clean/clean_isin.py | 1 + dataprep/clean/clean_ismn.py | 1 + dataprep/clean/clean_issn.py | 1 + dataprep/clean/clean_it_aic.py | 1 + dataprep/clean/clean_it_codicefiscale.py | 1 + dataprep/clean/clean_it_iva.py | 1 + dataprep/clean/clean_jp_cn.py | 1 + dataprep/clean/clean_json.py | 1 + dataprep/clean/clean_kr_brn.py | 1 + dataprep/clean/clean_kr_rrn.py | 1 + dataprep/clean/clean_lat_long.py | 1 + dataprep/clean/clean_lei.py | 1 + dataprep/clean/clean_li_peid.py | 1 + dataprep/clean/clean_lt_asmens.py | 1 + dataprep/clean/clean_lt_pvm.py | 1 + dataprep/clean/clean_lu_tva.py | 1 + dataprep/clean/clean_lv_pvn.py | 1 + dataprep/clean/clean_mc_tva.py | 1 + dataprep/clean/clean_md_idno.py | 1 + dataprep/clean/clean_me_iban.py | 1 + dataprep/clean/clean_meid.py | 1 + dataprep/clean/clean_mt_vat.py | 1 + dataprep/clean/clean_mu_nid.py | 1 + dataprep/clean/clean_mx_curp.py | 1 + dataprep/clean/clean_mx_rfc.py | 1 + dataprep/clean/clean_my_nric.py | 1 + dataprep/clean/clean_nl_brin.py | 1 + dataprep/clean/clean_nl_bsn.py | 1 + dataprep/clean/clean_nl_btw.py | 1 + dataprep/clean/clean_nl_onderwijsnummer.py | 1 + dataprep/clean/clean_nl_postcode.py | 1 + dataprep/clean/clean_no_fodselsnummer.py | 1 + dataprep/clean/clean_no_iban.py | 1 + dataprep/clean/clean_no_kontonr.py | 1 + dataprep/clean/clean_no_mva.py | 1 + dataprep/clean/clean_no_orgnr.py | 1 + dataprep/clean/clean_nz_bankaccount.py | 1 + dataprep/clean/clean_nz_ird.py | 1 + dataprep/clean/clean_pe_cui.py | 1 + dataprep/clean/clean_pe_ruc.py | 1 + dataprep/clean/clean_phone.py | 1 + dataprep/clean/clean_pl_nip.py | 1 + dataprep/clean/clean_pl_pesel.py | 1 + dataprep/clean/clean_pl_regon.py | 1 + dataprep/clean/clean_pt_nif.py | 1 + dataprep/clean/clean_py_ruc.py | 1 + dataprep/clean/clean_ro_cf.py | 1 + dataprep/clean/clean_ro_cnp.py | 1 + dataprep/clean/clean_ro_cui.py | 1 + dataprep/clean/clean_ro_onrc.py | 1 + dataprep/clean/clean_text.py | 1 + dataprep/clean/clean_url.py | 1 + dataprep/clean/clean_vatin.py | 1 + dataprep/clean/components/cat_encoder.py | 1 - .../cat_imputation/constant_imputer.py | 1 - dataprep/clean/components/cat_imputer.py | 1 - dataprep/clean/components/num_imputer.py | 1 - dataprep/clean/gui/clean_gui.py | 1 + dataprep/clean/pipeline.py | 1 - dataprep/clean/utils.py | 1 + dataprep/connector/config_manager.py | 1 + dataprep/connector/connector.py | 1 + dataprep/connector/errors.py | 1 + dataprep/connector/generator/__init__.py | 1 + dataprep/connector/generator/ui.py | 9 +- dataprep/connector/implicit_database.py | 1 + dataprep/connector/info.py | 1 + dataprep/connector/info_ui.py | 1 + dataprep/connector/schema/defs.py | 1 + dataprep/connector/sql.py | 1 + dataprep/connector/throttler.py | 1 + dataprep/connector/utils.py | 1 + dataprep/eda/__init__.py | 1 + dataprep/eda/correlation/render.py | 2 + dataprep/eda/create_db_report/__init__.py | 1 + dataprep/eda/create_db_report/report.py | 1 + dataprep/eda/create_db_report/run_function.py | 16 +- dataprep/eda/create_diff_report/__init__.py | 1 + dataprep/eda/create_report/__init__.py | 1 + dataprep/eda/create_report/report.py | 1 + dataprep/eda/diff/render.py | 1 + dataprep/eda/distribution/compute/__init__.py | 1 - .../eda/distribution/compute/bivariate.py | 1 + dataprep/eda/distribution/compute/overview.py | 2 +- dataprep/eda/distribution/render.py | 1 + dataprep/eda/dtypes.py | 1 + dataprep/eda/dtypes_v2.py | 1 + dataprep/eda/intermediate.py | 1 + dataprep/eda/missing/compute/__init__.py | 1 + dataprep/eda/missing/compute/common.py | 1 + dataprep/eda/missing/compute/nullivariate.py | 1 + dataprep/eda/missing/compute/univariate.py | 1 + dataprep/eda/missing/render.py | 1 + dataprep/eda/outlier/computation.py | 1 - dataprep/eda/palette.py | 1 + dataprep/eda/utils.py | 1 + dataprep/lineage/lx.py | 1 + dataprep/progress_bar.py | 2 + dataprep/tests/benchmarks/eda.py | 1 + dataprep/tests/clean/test_clean_address.py | 1 + dataprep/tests/clean/test_clean_country.py | 1 + dataprep/tests/clean/test_clean_date.py | 1 + dataprep/tests/clean/test_clean_email.py | 1 + dataprep/tests/clean/test_clean_headers.py | 1 + dataprep/tests/clean/test_clean_lat_long.py | 1 + dataprep/tests/clean/test_clean_phone.py | 1 + dataprep/tests/clean/test_clean_text.py | 1 + dataprep/tests/datasets/test_datasets.py | 1 + dataprep/tests/eda/test_config.py | 1 + dataprep/tests/eda/test_create_diff_report.py | 1 + dataprep/tests/eda/test_create_report.py | 1 + dataprep/tests/eda/test_plot.py | 1 + dataprep/tests/eda/test_plot_correlation.py | 1 + dataprep/tests/eda/test_plot_diff.py | 1 + dataprep/tests/eda/test_plot_missing.py | 1 + dataprep/tests/lineage/test_lineagex.py | 13 +- dataprep/utils.py | 1 + docs/source/user_guide/eda/house_price.ipynb | 236 +++++++++--------- 235 files changed, 362 insertions(+), 152 deletions(-) diff --git a/dataprep/__init__.py b/dataprep/__init__.py index bfa987add..78f02e7a4 100644 --- a/dataprep/__init__.py +++ b/dataprep/__init__.py @@ -4,6 +4,7 @@ Dataprep let you prepare your data using a single library with a few lines of code. """ + import logging DEFAULT_PARTITIONS = 1 diff --git a/dataprep/clean/address_utils.py b/dataprep/clean/address_utils.py index b544b7b05..c4dbcb1b2 100644 --- a/dataprep/clean/address_utils.py +++ b/dataprep/clean/address_utils.py @@ -1,6 +1,7 @@ """ Constants used by the clean_address() and validate_address() functions """ + # pylint: disable=C0301, C0302, E1101 from builtins import zip diff --git a/dataprep/clean/clean_ad_nrt.py b/dataprep/clean/clean_ad_nrt.py index 67077c30b..8fb1660df 100644 --- a/dataprep/clean/clean_ad_nrt.py +++ b/dataprep/clean/clean_ad_nrt.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Andorra NRT (Número de Registre Tributari, Andorra tax number). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133 from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_al_nipt.py b/dataprep/clean/clean_al_nipt.py index 3baf9b742..098a689ec 100644 --- a/dataprep/clean/clean_al_nipt.py +++ b/dataprep/clean/clean_al_nipt.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing NIPT (Numri i Identifikimit për Personin e Tatueshëm, Albanian VAT number). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133 from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ar_cbu.py b/dataprep/clean/clean_ar_cbu.py index dafb4d829..8f182af7c 100644 --- a/dataprep/clean/clean_ar_cbu.py +++ b/dataprep/clean/clean_ar_cbu.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing CBU (Clave Bancaria Uniforme, Argentine bank account number). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133 from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ar_cuit.py b/dataprep/clean/clean_ar_cuit.py index e7a6248de..7639fd2ea 100644 --- a/dataprep/clean/clean_ar_cuit.py +++ b/dataprep/clean/clean_ar_cuit.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing CUIT (Código Único de Identificación Tributaria, Argentinian tax number). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133 from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ar_dni.py b/dataprep/clean/clean_ar_dni.py index 8e25a4fd3..26c869bfc 100644 --- a/dataprep/clean/clean_ar_dni.py +++ b/dataprep/clean/clean_ar_dni.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing DNI (Documento Nacional de Identidad, Argentinian national identity nr.). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133 from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_at_uid.py b/dataprep/clean/clean_at_uid.py index bf5e87c59..538da1f9a 100644 --- a/dataprep/clean/clean_at_uid.py +++ b/dataprep/clean/clean_at_uid.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing UID (Umsatzsteuer-Identifikationsnummer, Austrian VAT number). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133 from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_at_vnr.py b/dataprep/clean/clean_at_vnr.py index a1941a913..a4367fd11 100644 --- a/dataprep/clean/clean_at_vnr.py +++ b/dataprep/clean/clean_at_vnr.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing VNR, SVNR, VSNR (Versicherungsnummer, Austrian social security number). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133 from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_au_abn.py b/dataprep/clean/clean_au_abn.py index 0c8d94cbf..d0df4cb83 100644 --- a/dataprep/clean/clean_au_abn.py +++ b/dataprep/clean/clean_au_abn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Australian Business Numbers (ABNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_au_acn.py b/dataprep/clean/clean_au_acn.py index f2be29032..edf96e3e8 100644 --- a/dataprep/clean/clean_au_acn.py +++ b/dataprep/clean/clean_au_acn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Australian Company Numbers (ACNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_au_tfn.py b/dataprep/clean/clean_au_tfn.py index 48f49574e..b41304c7b 100644 --- a/dataprep/clean/clean_au_tfn.py +++ b/dataprep/clean/clean_au_tfn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Australian Tax File Numbers (TFNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_be_iban.py b/dataprep/clean/clean_be_iban.py index 39645cefc..21c8e6045 100644 --- a/dataprep/clean/clean_be_iban.py +++ b/dataprep/clean/clean_be_iban.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Belgian IBANs. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_be_vat.py b/dataprep/clean/clean_be_vat.py index 1650b5122..e7da65a02 100644 --- a/dataprep/clean/clean_be_vat.py +++ b/dataprep/clean/clean_be_vat.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Belgian VAT numbers (VATs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_bg_egn.py b/dataprep/clean/clean_bg_egn.py index f33dc5788..f4cdcbd03 100644 --- a/dataprep/clean/clean_bg_egn.py +++ b/dataprep/clean/clean_bg_egn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Bulgarian national identification numbers (EGNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_bg_pnf.py b/dataprep/clean/clean_bg_pnf.py index a6273ea02..f0ab8f341 100644 --- a/dataprep/clean/clean_bg_pnf.py +++ b/dataprep/clean/clean_bg_pnf.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Bulgarian personal number of a foreigner. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_bg_vat.py b/dataprep/clean/clean_bg_vat.py index 53ffbe8ae..4a8e6ed19 100644 --- a/dataprep/clean/clean_bg_vat.py +++ b/dataprep/clean/clean_bg_vat.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Bulgarian VAT numbers (VATs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_bic.py b/dataprep/clean/clean_bic.py index 68c02a267..4d2248b9a 100644 --- a/dataprep/clean/clean_bic.py +++ b/dataprep/clean/clean_bic.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing ISO 9362 Business identifier codes. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_bitcoin.py b/dataprep/clean/clean_bitcoin.py index 03882c9b8..6c621831f 100644 --- a/dataprep/clean/clean_bitcoin.py +++ b/dataprep/clean/clean_bitcoin.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Bitcoin Addresses. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_br_cnpj.py b/dataprep/clean/clean_br_cnpj.py index 7293c669b..1f5c32b4a 100644 --- a/dataprep/clean/clean_br_cnpj.py +++ b/dataprep/clean/clean_br_cnpj.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing CNPJ numbers, Brazilian company identifier. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_br_cpf.py b/dataprep/clean/clean_br_cpf.py index c8def16c8..721b3500f 100644 --- a/dataprep/clean/clean_br_cpf.py +++ b/dataprep/clean/clean_br_cpf.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing CPF numbers, Brazilian national identifier. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_by_unp.py b/dataprep/clean/clean_by_unp.py index 6908980e6..65209f84d 100644 --- a/dataprep/clean/clean_by_unp.py +++ b/dataprep/clean/clean_by_unp.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Belarusian UNP numbers (UNPs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ca_bn.py b/dataprep/clean/clean_ca_bn.py index 753e26236..e914eb812 100644 --- a/dataprep/clean/clean_ca_bn.py +++ b/dataprep/clean/clean_ca_bn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Canadian Business Numbers (BNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ca_sin.py b/dataprep/clean/clean_ca_sin.py index b66e5be49..9369f5308 100644 --- a/dataprep/clean/clean_ca_sin.py +++ b/dataprep/clean/clean_ca_sin.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Canadian Social Insurance Numbers(SINs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_casrn.py b/dataprep/clean/clean_casrn.py index 367703e47..ccf2240b9 100644 --- a/dataprep/clean/clean_casrn.py +++ b/dataprep/clean/clean_casrn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing CAS Registry Numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ch_esr.py b/dataprep/clean/clean_ch_esr.py index 4205e2a5c..9a5e117de 100644 --- a/dataprep/clean/clean_ch_esr.py +++ b/dataprep/clean/clean_ch_esr.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Swiss EinzahlungsSchein mit Referenznummer (ESRs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ch_ssn.py b/dataprep/clean/clean_ch_ssn.py index f54a72b05..66eab09a5 100644 --- a/dataprep/clean/clean_ch_ssn.py +++ b/dataprep/clean/clean_ch_ssn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Swiss social security numbers (SSNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ch_uid.py b/dataprep/clean/clean_ch_uid.py index b2f8d8a0f..b146b5454 100644 --- a/dataprep/clean/clean_ch_uid.py +++ b/dataprep/clean/clean_ch_uid.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Swiss business identifiers (UIDs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ch_vat.py b/dataprep/clean/clean_ch_vat.py index 129154879..5c1fe2408 100644 --- a/dataprep/clean/clean_ch_vat.py +++ b/dataprep/clean/clean_ch_vat.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Swiss VAT numbers (VATs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_cl_rut.py b/dataprep/clean/clean_cl_rut.py index 427276575..687b09f3c 100644 --- a/dataprep/clean/clean_cl_rut.py +++ b/dataprep/clean/clean_cl_rut.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Chile RUT/RUN numbers (RUTs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_cn_ric.py b/dataprep/clean/clean_cn_ric.py index 2ad35fb95..02ec02565 100644 --- a/dataprep/clean/clean_cn_ric.py +++ b/dataprep/clean/clean_cn_ric.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Chinese Resident Identity Card Number (RICs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_cn_uscc.py b/dataprep/clean/clean_cn_uscc.py index e0e0743e8..38e6f473b 100644 --- a/dataprep/clean/clean_cn_uscc.py +++ b/dataprep/clean/clean_cn_uscc.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Chinese Unified Social Credit Code (China tax number) (USCCs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_co_nit.py b/dataprep/clean/clean_co_nit.py index 1cfed81c8..c31b7204c 100644 --- a/dataprep/clean/clean_co_nit.py +++ b/dataprep/clean/clean_co_nit.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Colombian identity codes (NITs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_country.py b/dataprep/clean/clean_country.py index 3b749558b..13154bc11 100644 --- a/dataprep/clean/clean_country.py +++ b/dataprep/clean/clean_country.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing country names. """ + from functools import lru_cache from operator import itemgetter from os import path @@ -371,9 +372,7 @@ def _get_format_if_allowed(input_format: str, allowed_formats: Tuple[str, ...]) return ( "name" if "name" in allowed_formats - else "official" - if "official" in allowed_formats - else None + else "official" if "official" in allowed_formats else None ) return input_format if input_format in allowed_formats else None diff --git a/dataprep/clean/clean_cr_cpf.py b/dataprep/clean/clean_cr_cpf.py index 4e9ff34a4..88e863417 100644 --- a/dataprep/clean/clean_cr_cpf.py +++ b/dataprep/clean/clean_cr_cpf.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Costa Rica physical person ID number (CPFs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_cr_cpj.py b/dataprep/clean/clean_cr_cpj.py index 22ab4fa93..7d3e22895 100644 --- a/dataprep/clean/clean_cr_cpj.py +++ b/dataprep/clean/clean_cr_cpj.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Costa Rica tax number (CPJs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_cr_cr.py b/dataprep/clean/clean_cr_cr.py index a57d40502..7ee1c6fe0 100644 --- a/dataprep/clean/clean_cr_cr.py +++ b/dataprep/clean/clean_cr_cr.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Costa Rica foreigners ID number (CRs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_cu_ni.py b/dataprep/clean/clean_cu_ni.py index 6184b93ad..085dd9786 100644 --- a/dataprep/clean/clean_cu_ni.py +++ b/dataprep/clean/clean_cu_ni.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Cuban identity card numbers (NIs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_currency.py b/dataprep/clean/clean_currency.py index d12baf401..cd33d22b0 100644 --- a/dataprep/clean/clean_currency.py +++ b/dataprep/clean/clean_currency.py @@ -50,7 +50,6 @@ def clean_currency( report: bool = True, progress: bool = False, ) -> Union[pd.DataFrame, dd.DataFrame]: - """ Clean, standardize and convert currencies. diff --git a/dataprep/clean/clean_cusip.py b/dataprep/clean/clean_cusip.py index 233eec2c9..1a9148bea 100644 --- a/dataprep/clean/clean_cusip.py +++ b/dataprep/clean/clean_cusip.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing CUSIP numbers (financial security identification number). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_cy_vat.py b/dataprep/clean/clean_cy_vat.py index 28b269aac..15c6cc72a 100644 --- a/dataprep/clean/clean_cy_vat.py +++ b/dataprep/clean/clean_cy_vat.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Cypriot VAT number (VATs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_cz_dic.py b/dataprep/clean/clean_cz_dic.py index 81a5ea73b..0365849fb 100644 --- a/dataprep/clean/clean_cz_dic.py +++ b/dataprep/clean/clean_cz_dic.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Czech VAT number (DICs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_cz_rc.py b/dataprep/clean/clean_cz_rc.py index 5d5e42783..abe130b7e 100644 --- a/dataprep/clean/clean_cz_rc.py +++ b/dataprep/clean/clean_cz_rc.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Czech birth numbers (RCs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_date.py b/dataprep/clean/clean_date.py index 407a57ad1..9b8001608 100644 --- a/dataprep/clean/clean_date.py +++ b/dataprep/clean/clean_date.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing dates and times. """ + # pylint: disable=too-many-lines import datetime from copy import deepcopy diff --git a/dataprep/clean/clean_date_utils.py b/dataprep/clean/clean_date_utils.py index 05cf2c3b3..007a9af25 100644 --- a/dataprep/clean/clean_date_utils.py +++ b/dataprep/clean/clean_date_utils.py @@ -1,6 +1,7 @@ """ Common definitions and classes for the clean_date function. """ + # pylint: disable-msg=too-many-branches import datetime @@ -200,7 +201,6 @@ class ParsedDate: - """Attributes of a parsed date. Attributes: year: Value of year. @@ -388,7 +388,6 @@ def _is_leap_year(self) -> bool: class ParsedTargetFormat: - """Attributes of a parsed target format. Attributes: year_token: Token standing of year. diff --git a/dataprep/clean/clean_de_handelsregisternummer.py b/dataprep/clean/clean_de_handelsregisternummer.py index d2a2d7487..2848f37f1 100644 --- a/dataprep/clean/clean_de_handelsregisternummer.py +++ b/dataprep/clean/clean_de_handelsregisternummer.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing German company registry id (handelsregisternummer). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_de_idnr.py b/dataprep/clean/clean_de_idnr.py index 95184fbeb..ddb647271 100644 --- a/dataprep/clean/clean_de_idnr.py +++ b/dataprep/clean/clean_de_idnr.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing German personal tax number (IDNRs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_de_stnr.py b/dataprep/clean/clean_de_stnr.py index 46b3f80f6..2be32e65c 100644 --- a/dataprep/clean/clean_de_stnr.py +++ b/dataprep/clean/clean_de_stnr.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing German tax numbers (STNRs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from typing import Optional diff --git a/dataprep/clean/clean_de_vat.py b/dataprep/clean/clean_de_vat.py index ca981ebcb..2dc371d2e 100644 --- a/dataprep/clean/clean_de_vat.py +++ b/dataprep/clean/clean_de_vat.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing German VAT numbers (VATs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_de_wkn.py b/dataprep/clean/clean_de_wkn.py index bb73daebe..2f6ece082 100644 --- a/dataprep/clean/clean_de_wkn.py +++ b/dataprep/clean/clean_de_wkn.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing German Securities Identification Codes (WKNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_df.py b/dataprep/clean/clean_df.py index 85c36b8a1..b275076e8 100644 --- a/dataprep/clean/clean_df.py +++ b/dataprep/clean/clean_df.py @@ -2,6 +2,7 @@ Conduct a set of operations that would be useful for cleaning and standardizing a full Pandas DataFrame. """ + # pylint: disable-msg=relative-beyond-top-level # pylint: disable-msg=cyclic-import diff --git a/dataprep/clean/clean_df_gui.py b/dataprep/clean/clean_df_gui.py index 7181856f0..222246a10 100644 --- a/dataprep/clean/clean_df_gui.py +++ b/dataprep/clean/clean_df_gui.py @@ -2,6 +2,7 @@ Conduct a set of operations that would be useful for cleaning and standardizing a full Pandas DataFrame. """ + # pylint: disable-msg=relative-beyond-top-level # pylint: disable-msg=cyclic-import # type: ignore diff --git a/dataprep/clean/clean_dk_cpr.py b/dataprep/clean/clean_dk_cpr.py index 07f8cde1b..30454b195 100644 --- a/dataprep/clean/clean_dk_cpr.py +++ b/dataprep/clean/clean_dk_cpr.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Danish citizen number (CPRs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_dk_cvr.py b/dataprep/clean/clean_dk_cvr.py index 7167a1053..5f4ed0346 100644 --- a/dataprep/clean/clean_dk_cvr.py +++ b/dataprep/clean/clean_dk_cvr.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Danish CVR number (CVRs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_do_cedula.py b/dataprep/clean/clean_do_cedula.py index 475f0657e..38f7f8d86 100644 --- a/dataprep/clean/clean_do_cedula.py +++ b/dataprep/clean/clean_do_cedula.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Dominican Republic national identifier (Cedulas). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_do_ncf.py b/dataprep/clean/clean_do_ncf.py index 48eb9ffed..0a22680dd 100644 --- a/dataprep/clean/clean_do_ncf.py +++ b/dataprep/clean/clean_do_ncf.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Dominican Republic invoice numbers (NCFs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_do_rnc.py b/dataprep/clean/clean_do_rnc.py index db014131d..1d7a2a91d 100644 --- a/dataprep/clean/clean_do_rnc.py +++ b/dataprep/clean/clean_do_rnc.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Dominican Republic tax registration (RNCs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ean.py b/dataprep/clean/clean_ean.py index 2d696db78..65987d089 100644 --- a/dataprep/clean/clean_ean.py +++ b/dataprep/clean/clean_ean.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing EAN (International Article Number). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ec_ci.py b/dataprep/clean/clean_ec_ci.py index d955dde47..f54c85b22 100644 --- a/dataprep/clean/clean_ec_ci.py +++ b/dataprep/clean/clean_ec_ci.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Ecuadorian personal identity codes (CIs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ec_ruc.py b/dataprep/clean/clean_ec_ruc.py index 00bdb1486..9c00259d5 100644 --- a/dataprep/clean/clean_ec_ruc.py +++ b/dataprep/clean/clean_ec_ruc.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Ecuadorian company tax number (RUCs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ee_ik.py b/dataprep/clean/clean_ee_ik.py index c30e0825f..1d10a47af 100644 --- a/dataprep/clean/clean_ee_ik.py +++ b/dataprep/clean/clean_ee_ik.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Estonian Personcal ID numbers (IKs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ee_kmkr.py b/dataprep/clean/clean_ee_kmkr.py index 59ee88160..e5bbb83ee 100644 --- a/dataprep/clean/clean_ee_kmkr.py +++ b/dataprep/clean/clean_ee_kmkr.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Estonian KMKR numbers (KMKRs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ee_registrikood.py b/dataprep/clean/clean_ee_registrikood.py index a7f236905..b09681917 100644 --- a/dataprep/clean/clean_ee_registrikood.py +++ b/dataprep/clean/clean_ee_registrikood.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Estonian organisation registration codes. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_email.py b/dataprep/clean/clean_email.py index 77a362a6c..82be5964e 100644 --- a/dataprep/clean/clean_email.py +++ b/dataprep/clean/clean_email.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing email addresses. """ + import re from operator import itemgetter from typing import Any, Union diff --git a/dataprep/clean/clean_es_ccc.py b/dataprep/clean/clean_es_ccc.py index 4c31bb67c..5f3e1cb5f 100644 --- a/dataprep/clean/clean_es_ccc.py +++ b/dataprep/clean/clean_es_ccc.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Spanish Bank Account Codes (CCCs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_es_cif.py b/dataprep/clean/clean_es_cif.py index cecbd9166..53cdd54e5 100644 --- a/dataprep/clean/clean_es_cif.py +++ b/dataprep/clean/clean_es_cif.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Spanish fiscal numbers (CIFs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_es_cups.py b/dataprep/clean/clean_es_cups.py index 0d7c92ae0..f6044c352 100644 --- a/dataprep/clean/clean_es_cups.py +++ b/dataprep/clean/clean_es_cups.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Spanish meter point numbers (CUPSs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_es_dni.py b/dataprep/clean/clean_es_dni.py index 96d65dd57..f857425e1 100644 --- a/dataprep/clean/clean_es_dni.py +++ b/dataprep/clean/clean_es_dni.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Spanish personal identity codes (DNIs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_es_iban.py b/dataprep/clean/clean_es_iban.py index d186854c7..4f9f725ab 100644 --- a/dataprep/clean/clean_es_iban.py +++ b/dataprep/clean/clean_es_iban.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Spanish IBANs (IBANs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_es_nie.py b/dataprep/clean/clean_es_nie.py index dfa674ba8..a15b571c1 100644 --- a/dataprep/clean/clean_es_nie.py +++ b/dataprep/clean/clean_es_nie.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Spanish foreigner identity codes (NIEs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_es_nif.py b/dataprep/clean/clean_es_nif.py index ce020f503..c244256a5 100644 --- a/dataprep/clean/clean_es_nif.py +++ b/dataprep/clean/clean_es_nif.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Spanish NIF numbers (NIFs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_es_referenciacatastral.py b/dataprep/clean/clean_es_referenciacatastral.py index b52fb1391..99124d892 100644 --- a/dataprep/clean/clean_es_referenciacatastral.py +++ b/dataprep/clean/clean_es_referenciacatastral.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Spanish real state ids. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_eu_at_02.py b/dataprep/clean/clean_eu_at_02.py index 7cf650fa8..b4ce19dff 100644 --- a/dataprep/clean/clean_eu_at_02.py +++ b/dataprep/clean/clean_eu_at_02.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing AT-02 (SEPA Creditor identifier). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_eu_banknote.py b/dataprep/clean/clean_eu_banknote.py index cdaed11fa..18b8a1532 100644 --- a/dataprep/clean/clean_eu_banknote.py +++ b/dataprep/clean/clean_eu_banknote.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Euro banknote serial numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_eu_eic.py b/dataprep/clean/clean_eu_eic.py index b820f13fb..97f7767d2 100644 --- a/dataprep/clean/clean_eu_eic.py +++ b/dataprep/clean/clean_eu_eic.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing European Energy Identification Codes. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_eu_nace.py b/dataprep/clean/clean_eu_nace.py index eb4ab0e8f..e3b4387a0 100644 --- a/dataprep/clean/clean_eu_nace.py +++ b/dataprep/clean/clean_eu_nace.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing classification for businesses in the European Union (NACE). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_eu_vat.py b/dataprep/clean/clean_eu_vat.py index a68b93f6a..37c2c865a 100644 --- a/dataprep/clean/clean_eu_vat.py +++ b/dataprep/clean/clean_eu_vat.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing European VAT numbers (VATs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_fi_alv.py b/dataprep/clean/clean_fi_alv.py index c649fa03e..a7beb0390 100644 --- a/dataprep/clean/clean_fi_alv.py +++ b/dataprep/clean/clean_fi_alv.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Finnish ALV numbers (ALVs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_fi_associationid.py b/dataprep/clean/clean_fi_associationid.py index cebbb33a6..913a8947b 100644 --- a/dataprep/clean/clean_fi_associationid.py +++ b/dataprep/clean/clean_fi_associationid.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Finnish association registry ids. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_fi_hetu.py b/dataprep/clean/clean_fi_hetu.py index 3181c160e..4420c092d 100644 --- a/dataprep/clean/clean_fi_hetu.py +++ b/dataprep/clean/clean_fi_hetu.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Finnish personal identity codes (HETUs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_fi_veronumero.py b/dataprep/clean/clean_fi_veronumero.py index 7992cdaf2..46767fc98 100644 --- a/dataprep/clean/clean_fi_veronumero.py +++ b/dataprep/clean/clean_fi_veronumero.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Finnish individual tax numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_fi_ytunnus.py b/dataprep/clean/clean_fi_ytunnus.py index 2ec9fb5fc..9f8366f54 100644 --- a/dataprep/clean/clean_fi_ytunnus.py +++ b/dataprep/clean/clean_fi_ytunnus.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Finnish business identifiers (y-tunnus). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_figi.py b/dataprep/clean/clean_figi.py index 141660dbb..085f51621 100644 --- a/dataprep/clean/clean_figi.py +++ b/dataprep/clean/clean_figi.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing FIGI (Financial Instrument Global Identifier) Numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_fr_nif.py b/dataprep/clean/clean_fr_nif.py index 61843b2f3..bc374a4d0 100644 --- a/dataprep/clean/clean_fr_nif.py +++ b/dataprep/clean/clean_fr_nif.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing French tax identification numbers (NIFs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_fr_nir.py b/dataprep/clean/clean_fr_nir.py index cb2851a42..50859c37b 100644 --- a/dataprep/clean/clean_fr_nir.py +++ b/dataprep/clean/clean_fr_nir.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing French personal identification numbers (NIRs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_fr_siren.py b/dataprep/clean/clean_fr_siren.py index 63fe560db..1e241add0 100644 --- a/dataprep/clean/clean_fr_siren.py +++ b/dataprep/clean/clean_fr_siren.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing French company identification numbers (SIRENs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_fr_siret.py b/dataprep/clean/clean_fr_siret.py index f51f37759..5b366c151 100644 --- a/dataprep/clean/clean_fr_siret.py +++ b/dataprep/clean/clean_fr_siret.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing French company establishment identification numbers (SIRETs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_fr_tva.py b/dataprep/clean/clean_fr_tva.py index 6c886eb54..5420891da 100644 --- a/dataprep/clean/clean_fr_tva.py +++ b/dataprep/clean/clean_fr_tva.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing French TVA numbers (TVAs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_gb_nhs.py b/dataprep/clean/clean_gb_nhs.py index ea372440a..e4e0b713b 100644 --- a/dataprep/clean/clean_gb_nhs.py +++ b/dataprep/clean/clean_gb_nhs.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing United Kingdom National Health Service patient identifier (NHSs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_gb_sedol.py b/dataprep/clean/clean_gb_sedol.py index cec331386..d77858a7a 100644 --- a/dataprep/clean/clean_gb_sedol.py +++ b/dataprep/clean/clean_gb_sedol.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Stock Exchange Daily Official List numbers (SEDOLs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_gb_upn.py b/dataprep/clean/clean_gb_upn.py index 8a8645b37..3123f64d0 100644 --- a/dataprep/clean/clean_gb_upn.py +++ b/dataprep/clean/clean_gb_upn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing English Unique Pupil Numbers (UPNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_gb_utr.py b/dataprep/clean/clean_gb_utr.py index ef5f3145b..127869520 100644 --- a/dataprep/clean/clean_gb_utr.py +++ b/dataprep/clean/clean_gb_utr.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing United Kingdom Unique Taxpayer Reference (UTRs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_gb_vat.py b/dataprep/clean/clean_gb_vat.py index 6985a6e1b..341e7c5db 100644 --- a/dataprep/clean/clean_gb_vat.py +++ b/dataprep/clean/clean_gb_vat.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing United Kingdom VAT numbers (VATs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_gr_amka.py b/dataprep/clean/clean_gr_amka.py index ad87401ba..959856a17 100644 --- a/dataprep/clean/clean_gr_amka.py +++ b/dataprep/clean/clean_gr_amka.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Greek social security numbers (AMKAs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_gr_vat.py b/dataprep/clean/clean_gr_vat.py index 86edd91a4..d3aff0aa7 100644 --- a/dataprep/clean/clean_gr_vat.py +++ b/dataprep/clean/clean_gr_vat.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Greek VAT numbers (VATs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_grid.py b/dataprep/clean/clean_grid.py index 498df3e56..e3af79158 100644 --- a/dataprep/clean/clean_grid.py +++ b/dataprep/clean/clean_grid.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Global Release Identifier (GRid) numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_gt_nit.py b/dataprep/clean/clean_gt_nit.py index 0bcb0b021..d0cf28acb 100644 --- a/dataprep/clean/clean_gt_nit.py +++ b/dataprep/clean/clean_gt_nit.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Guatemala tax numbers (NITs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_headers.py b/dataprep/clean/clean_headers.py index fe13e5bf0..54e64b35f 100644 --- a/dataprep/clean/clean_headers.py +++ b/dataprep/clean/clean_headers.py @@ -1,6 +1,7 @@ """ Clean and standardize column headers for a DataFrame. """ + import re from typing import Any, Dict, List, Optional, Union from unicodedata import normalize diff --git a/dataprep/clean/clean_hr_oib.py b/dataprep/clean/clean_hr_oib.py index e99894a12..09da24f8f 100644 --- a/dataprep/clean/clean_hr_oib.py +++ b/dataprep/clean/clean_hr_oib.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Croatian identification numbers (OIBs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_hu_anum.py b/dataprep/clean/clean_hu_anum.py index 6bb07c1b1..0f9553346 100644 --- a/dataprep/clean/clean_hu_anum.py +++ b/dataprep/clean/clean_hu_anum.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Hungarian ANUM numbers (ANUMs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_iban.py b/dataprep/clean/clean_iban.py index 8870165d0..4968daa64 100644 --- a/dataprep/clean/clean_iban.py +++ b/dataprep/clean/clean_iban.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing IBAN numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_id_npwp.py b/dataprep/clean/clean_id_npwp.py index 5b7e7a2ca..a3af66196 100644 --- a/dataprep/clean/clean_id_npwp.py +++ b/dataprep/clean/clean_id_npwp.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Indonesian VAT Numbers (NPWPs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ie_pps.py b/dataprep/clean/clean_ie_pps.py index dbcc8bc4f..8f16f90c1 100644 --- a/dataprep/clean/clean_ie_pps.py +++ b/dataprep/clean/clean_ie_pps.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Irish personal numbers (PPSs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ie_vat.py b/dataprep/clean/clean_ie_vat.py index 8e0b6da03..7c533c6b1 100644 --- a/dataprep/clean/clean_ie_vat.py +++ b/dataprep/clean/clean_ie_vat.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Irish VAT numbers (VATs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_il_hp.py b/dataprep/clean/clean_il_hp.py index e12a36e8e..f70a3e576 100644 --- a/dataprep/clean/clean_il_hp.py +++ b/dataprep/clean/clean_il_hp.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Israeli company numbers (HPs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_il_idnr.py b/dataprep/clean/clean_il_idnr.py index 8d4c68cab..6b17c6d7f 100644 --- a/dataprep/clean/clean_il_idnr.py +++ b/dataprep/clean/clean_il_idnr.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Israeli personal numbers (IDNRs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_imei.py b/dataprep/clean/clean_imei.py index 7c3750e3f..957e8bb07 100644 --- a/dataprep/clean/clean_imei.py +++ b/dataprep/clean/clean_imei.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing International Mobile Equipment Identity (IMEI) numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_imo.py b/dataprep/clean/clean_imo.py index 4cff00534..dc295886e 100644 --- a/dataprep/clean/clean_imo.py +++ b/dataprep/clean/clean_imo.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing International Maritime Organization Numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_imsi.py b/dataprep/clean/clean_imsi.py index 13a0522aa..202249afd 100644 --- a/dataprep/clean/clean_imsi.py +++ b/dataprep/clean/clean_imsi.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing International Mobile Subscriber Identity (IMSI) numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_in_aadhaar.py b/dataprep/clean/clean_in_aadhaar.py index ecd86f9aa..28dd42000 100644 --- a/dataprep/clean/clean_in_aadhaar.py +++ b/dataprep/clean/clean_in_aadhaar.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Indian digital resident personal identity numbers (Aadhaars). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_in_pan.py b/dataprep/clean/clean_in_pan.py index 218ff933d..1156e1db2 100644 --- a/dataprep/clean/clean_in_pan.py +++ b/dataprep/clean/clean_in_pan.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Indian Permanent Account numbers (PANs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ip.py b/dataprep/clean/clean_ip.py index bd03f10bb..3bebd6274 100644 --- a/dataprep/clean/clean_ip.py +++ b/dataprep/clean/clean_ip.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing IP addresses. """ + from ipaddress import ip_address from operator import itemgetter from typing import Any, Union diff --git a/dataprep/clean/clean_is_kennitala.py b/dataprep/clean/clean_is_kennitala.py index ddf9701b9..8069882b8 100644 --- a/dataprep/clean/clean_is_kennitala.py +++ b/dataprep/clean/clean_is_kennitala.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Icelandic identity codes (Kennitalas). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_is_vsk.py b/dataprep/clean/clean_is_vsk.py index 74d769c79..02ea1e083 100644 --- a/dataprep/clean/clean_is_vsk.py +++ b/dataprep/clean/clean_is_vsk.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Icelandic VSK numbers (VSKs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_isan.py b/dataprep/clean/clean_isan.py index 0e6c1cc77..cd47d67d3 100644 --- a/dataprep/clean/clean_isan.py +++ b/dataprep/clean/clean_isan.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing International Standard Audiovisual Numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_isbn.py b/dataprep/clean/clean_isbn.py index c9d346c98..11b94e14e 100644 --- a/dataprep/clean/clean_isbn.py +++ b/dataprep/clean/clean_isbn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing ISBN numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_isil.py b/dataprep/clean/clean_isil.py index f5fd5793f..340a2bd67 100644 --- a/dataprep/clean/clean_isil.py +++ b/dataprep/clean/clean_isil.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing International Standard Identifier for Libraries (ISIL) numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_isin.py b/dataprep/clean/clean_isin.py index 4eb4b09da..42696148e 100644 --- a/dataprep/clean/clean_isin.py +++ b/dataprep/clean/clean_isin.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing International Securities Identification Number (ISIN) numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ismn.py b/dataprep/clean/clean_ismn.py index d8624da7a..0f7c15ad3 100644 --- a/dataprep/clean/clean_ismn.py +++ b/dataprep/clean/clean_ismn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing ISMN numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_issn.py b/dataprep/clean/clean_issn.py index 21d217249..8c53bcce5 100644 --- a/dataprep/clean/clean_issn.py +++ b/dataprep/clean/clean_issn.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing International Standard Serial Numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_it_aic.py b/dataprep/clean/clean_it_aic.py index 8b02d0adf..40c6705f0 100644 --- a/dataprep/clean/clean_it_aic.py +++ b/dataprep/clean/clean_it_aic.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Italian code for identification of drugs (AICs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_it_codicefiscale.py b/dataprep/clean/clean_it_codicefiscale.py index 0ade2ab3c..30ca03347 100644 --- a/dataprep/clean/clean_it_codicefiscale.py +++ b/dataprep/clean/clean_it_codicefiscale.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Italian fiscal codes (Codice Fiscales). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_it_iva.py b/dataprep/clean/clean_it_iva.py index 776330a84..4bdb3a10b 100644 --- a/dataprep/clean/clean_it_iva.py +++ b/dataprep/clean/clean_it_iva.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Italian IVA numbers (IVAs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_jp_cn.py b/dataprep/clean/clean_jp_cn.py index ab952d597..466db4b91 100644 --- a/dataprep/clean/clean_jp_cn.py +++ b/dataprep/clean/clean_jp_cn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Japanese Corporate Numbers (CNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_json.py b/dataprep/clean/clean_json.py index cd0d2a945..b3d999405 100644 --- a/dataprep/clean/clean_json.py +++ b/dataprep/clean/clean_json.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing JSON. """ + from typing import Any, Union import json diff --git a/dataprep/clean/clean_kr_brn.py b/dataprep/clean/clean_kr_brn.py index 633183262..0886b61c5 100644 --- a/dataprep/clean/clean_kr_brn.py +++ b/dataprep/clean/clean_kr_brn.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing South Korea Business Registration Numbers (BRNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_kr_rrn.py b/dataprep/clean/clean_kr_rrn.py index cd70a0cc1..7c4d86b45 100644 --- a/dataprep/clean/clean_kr_rrn.py +++ b/dataprep/clean/clean_kr_rrn.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing South Korean resident registration numbers (RRNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_lat_long.py b/dataprep/clean/clean_lat_long.py index ec97e793f..1d1a4bfec 100644 --- a/dataprep/clean/clean_lat_long.py +++ b/dataprep/clean/clean_lat_long.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing geographic coordinates. """ + import re from operator import itemgetter from typing import Any, Optional, Tuple, Union diff --git a/dataprep/clean/clean_lei.py b/dataprep/clean/clean_lei.py index 423f28522..81c01051f 100644 --- a/dataprep/clean/clean_lei.py +++ b/dataprep/clean/clean_lei.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Legal Entity Identifier (LEI) Numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133 from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_li_peid.py b/dataprep/clean/clean_li_peid.py index 6ff2156ce..82f52b0d0 100644 --- a/dataprep/clean/clean_li_peid.py +++ b/dataprep/clean/clean_li_peid.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Liechtenstein tax code for individuals and entities (PEIDs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_lt_asmens.py b/dataprep/clean/clean_lt_asmens.py index 5b3a7a4d6..ab5111b8e 100644 --- a/dataprep/clean/clean_lt_asmens.py +++ b/dataprep/clean/clean_lt_asmens.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Lithuanian personal numbers (Asmens kodas). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_lt_pvm.py b/dataprep/clean/clean_lt_pvm.py index 4e3caeacb..78b3546f3 100644 --- a/dataprep/clean/clean_lt_pvm.py +++ b/dataprep/clean/clean_lt_pvm.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Lithuanian PVM numbers (PVMs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_lu_tva.py b/dataprep/clean/clean_lu_tva.py index dc6a0eb52..aa8790947 100644 --- a/dataprep/clean/clean_lu_tva.py +++ b/dataprep/clean/clean_lu_tva.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Luxembourgian TVA numbers (TVAs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_lv_pvn.py b/dataprep/clean/clean_lv_pvn.py index 7122cdaae..994f872cf 100644 --- a/dataprep/clean/clean_lv_pvn.py +++ b/dataprep/clean/clean_lv_pvn.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Latvian PVN (VAT) numbers (PVNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_mc_tva.py b/dataprep/clean/clean_mc_tva.py index 8521617c4..e8cb09aeb 100644 --- a/dataprep/clean/clean_mc_tva.py +++ b/dataprep/clean/clean_mc_tva.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Monacan TVA numbers (TVAs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_md_idno.py b/dataprep/clean/clean_md_idno.py index 4b815bdb9..6a88f4151 100644 --- a/dataprep/clean/clean_md_idno.py +++ b/dataprep/clean/clean_md_idno.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Moldavian company identification numbers (IDNOs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_me_iban.py b/dataprep/clean/clean_me_iban.py index 3834e014b..12615446e 100644 --- a/dataprep/clean/clean_me_iban.py +++ b/dataprep/clean/clean_me_iban.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Montenegro IBANs (IBANs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_meid.py b/dataprep/clean/clean_meid.py index 120be26a8..5a9cf7da3 100644 --- a/dataprep/clean/clean_meid.py +++ b/dataprep/clean/clean_meid.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Mobile Equipment Identifiers (MEIDs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133 from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_mt_vat.py b/dataprep/clean/clean_mt_vat.py index ac10880d7..da436ebd9 100644 --- a/dataprep/clean/clean_mt_vat.py +++ b/dataprep/clean/clean_mt_vat.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Maltese VAT numbers (VATs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_mu_nid.py b/dataprep/clean/clean_mu_nid.py index c1b9dae8b..e84e4deea 100644 --- a/dataprep/clean/clean_mu_nid.py +++ b/dataprep/clean/clean_mu_nid.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Mauritian national ID numbers (NIDs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_mx_curp.py b/dataprep/clean/clean_mx_curp.py index 8695d2190..f0b308462 100644 --- a/dataprep/clean/clean_mx_curp.py +++ b/dataprep/clean/clean_mx_curp.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Mexican personal identifiers (CURPs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_mx_rfc.py b/dataprep/clean/clean_mx_rfc.py index 2c6369c9e..8a2f75b25 100644 --- a/dataprep/clean/clean_mx_rfc.py +++ b/dataprep/clean/clean_mx_rfc.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Mexican tax numbers (RFCs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_my_nric.py b/dataprep/clean/clean_my_nric.py index 66bc96f2a..201b0659c 100644 --- a/dataprep/clean/clean_my_nric.py +++ b/dataprep/clean/clean_my_nric.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Malaysian National Registration Identity Card Numbers (NRICs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_nl_brin.py b/dataprep/clean/clean_nl_brin.py index 609d919b0..7869b7122 100644 --- a/dataprep/clean/clean_nl_brin.py +++ b/dataprep/clean/clean_nl_brin.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Brin numbers (BRINs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_nl_bsn.py b/dataprep/clean/clean_nl_bsn.py index b0223375f..96604bfd9 100644 --- a/dataprep/clean/clean_nl_bsn.py +++ b/dataprep/clean/clean_nl_bsn.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Burgerservicenummer, the Dutch citizen identification numbers (BSNs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_nl_btw.py b/dataprep/clean/clean_nl_btw.py index b00a0e284..dd83b87b0 100644 --- a/dataprep/clean/clean_nl_btw.py +++ b/dataprep/clean/clean_nl_btw.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Dutch BTW numbers (BTWs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_nl_onderwijsnummer.py b/dataprep/clean/clean_nl_onderwijsnummer.py index b5f15f6ef..038db0cde 100644 --- a/dataprep/clean/clean_nl_onderwijsnummer.py +++ b/dataprep/clean/clean_nl_onderwijsnummer.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing Onderwijsnummer, the Dutch student identification number. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_nl_postcode.py b/dataprep/clean/clean_nl_postcode.py index 6c7475af5..2844c4e63 100644 --- a/dataprep/clean/clean_nl_postcode.py +++ b/dataprep/clean/clean_nl_postcode.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Dutch postal codes. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_no_fodselsnummer.py b/dataprep/clean/clean_no_fodselsnummer.py index e8cdba8c7..3ef88d103 100644 --- a/dataprep/clean/clean_no_fodselsnummer.py +++ b/dataprep/clean/clean_no_fodselsnummer.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Norwegian birth numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_no_iban.py b/dataprep/clean/clean_no_iban.py index 8ab2720d3..1b65c93f1 100644 --- a/dataprep/clean/clean_no_iban.py +++ b/dataprep/clean/clean_no_iban.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Norwegian IBANs (IBANs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_no_kontonr.py b/dataprep/clean/clean_no_kontonr.py index 48fd9ed13..44c38dda5 100644 --- a/dataprep/clean/clean_no_kontonr.py +++ b/dataprep/clean/clean_no_kontonr.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Norwegian bank account numbers (kontonrs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_no_mva.py b/dataprep/clean/clean_no_mva.py index 5ee5985f8..425771966 100644 --- a/dataprep/clean/clean_no_mva.py +++ b/dataprep/clean/clean_no_mva.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Norwegian VAT numbers (MVAs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_no_orgnr.py b/dataprep/clean/clean_no_orgnr.py index 83546a5cc..599778654 100644 --- a/dataprep/clean/clean_no_orgnr.py +++ b/dataprep/clean/clean_no_orgnr.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Norwegian organisation numbers (Orgnrs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_nz_bankaccount.py b/dataprep/clean/clean_nz_bankaccount.py index 2a2df6f79..440ba62e5 100644 --- a/dataprep/clean/clean_nz_bankaccount.py +++ b/dataprep/clean/clean_nz_bankaccount.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing New Zealand bank account numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_nz_ird.py b/dataprep/clean/clean_nz_ird.py index 69a558f4e..7687905ee 100644 --- a/dataprep/clean/clean_nz_ird.py +++ b/dataprep/clean/clean_nz_ird.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing New Zealand IRD numbers (IRDs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_pe_cui.py b/dataprep/clean/clean_pe_cui.py index a7368a5c4..7b7c500bc 100644 --- a/dataprep/clean/clean_pe_cui.py +++ b/dataprep/clean/clean_pe_cui.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Peruvian personal numbers (CUIs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_pe_ruc.py b/dataprep/clean/clean_pe_ruc.py index 62e50973f..6523f0208 100644 --- a/dataprep/clean/clean_pe_ruc.py +++ b/dataprep/clean/clean_pe_ruc.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Peruvian fiscal numbers (RUCs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_phone.py b/dataprep/clean/clean_phone.py index 81d1ff9fb..4aa37c339 100644 --- a/dataprep/clean/clean_phone.py +++ b/dataprep/clean/clean_phone.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing phone numbers. """ + import re from operator import itemgetter from typing import Any, Union diff --git a/dataprep/clean/clean_pl_nip.py b/dataprep/clean/clean_pl_nip.py index abeb64bfb..9a1ab42e9 100644 --- a/dataprep/clean/clean_pl_nip.py +++ b/dataprep/clean/clean_pl_nip.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Polish VAT numbers (NIPs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_pl_pesel.py b/dataprep/clean/clean_pl_pesel.py index be67d6e65..358dd3aba 100644 --- a/dataprep/clean/clean_pl_pesel.py +++ b/dataprep/clean/clean_pl_pesel.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Polish national identification numbers (PESELs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_pl_regon.py b/dataprep/clean/clean_pl_regon.py index 20bb20505..5ed2db9da 100644 --- a/dataprep/clean/clean_pl_regon.py +++ b/dataprep/clean/clean_pl_regon.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Polish register of economic units (REGONs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_pt_nif.py b/dataprep/clean/clean_pt_nif.py index 9068631e6..562e4416e 100644 --- a/dataprep/clean/clean_pt_nif.py +++ b/dataprep/clean/clean_pt_nif.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Portuguese NIF numbers (NIFs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_py_ruc.py b/dataprep/clean/clean_py_ruc.py index f9f52d1a2..ac36cef4b 100644 --- a/dataprep/clean/clean_py_ruc.py +++ b/dataprep/clean/clean_py_ruc.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Paraguay RUC numbers (RUCs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ro_cf.py b/dataprep/clean/clean_ro_cf.py index f42527b7f..8784832be 100644 --- a/dataprep/clean/clean_ro_cf.py +++ b/dataprep/clean/clean_ro_cf.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Romanian CF (VAT) numbers (CFs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ro_cnp.py b/dataprep/clean/clean_ro_cnp.py index 8d42e76b0..e131001f2 100644 --- a/dataprep/clean/clean_ro_cnp.py +++ b/dataprep/clean/clean_ro_cnp.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Romanian Numerical Personal Codes (CNPs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ro_cui.py b/dataprep/clean/clean_ro_cui.py index 37db5a19c..5d1a5d09d 100644 --- a/dataprep/clean/clean_ro_cui.py +++ b/dataprep/clean/clean_ro_cui.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Romanian company identifiers (CUIs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_ro_onrc.py b/dataprep/clean/clean_ro_onrc.py index cce5aa4d6..251c018cf 100644 --- a/dataprep/clean/clean_ro_onrc.py +++ b/dataprep/clean/clean_ro_onrc.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing Romanian Trade Register identifiers (ONRCs). """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/clean_text.py b/dataprep/clean/clean_text.py index f769539e9..82893922f 100644 --- a/dataprep/clean/clean_text.py +++ b/dataprep/clean/clean_text.py @@ -1,6 +1,7 @@ """ Clean a DataFrame column containing text data. """ + import re import string from functools import partial, update_wrapper diff --git a/dataprep/clean/clean_url.py b/dataprep/clean/clean_url.py index 34a4f5a13..a1d1fd23f 100644 --- a/dataprep/clean/clean_url.py +++ b/dataprep/clean/clean_url.py @@ -1,6 +1,7 @@ """ Clean and validate a DataFrame column containing URLs. """ + import re from operator import itemgetter from typing import Any, List, Union diff --git a/dataprep/clean/clean_vatin.py b/dataprep/clean/clean_vatin.py index 36bdf6873..9640c3895 100644 --- a/dataprep/clean/clean_vatin.py +++ b/dataprep/clean/clean_vatin.py @@ -2,6 +2,7 @@ Clean and validate a DataFrame column containing International value added tax identification numbers. """ + # pylint: disable=too-many-lines, too-many-arguments, too-many-branches, unused-argument, E1101, E1133 from typing import Any, Union from operator import itemgetter diff --git a/dataprep/clean/components/cat_encoder.py b/dataprep/clean/components/cat_encoder.py index 1c8e4d62e..1329ef32a 100644 --- a/dataprep/clean/components/cat_encoder.py +++ b/dataprep/clean/components/cat_encoder.py @@ -9,7 +9,6 @@ class CatEncoder: - """Categorical encoder for encoding categorical columns Attributes: encode_type diff --git a/dataprep/clean/components/cat_imputation/constant_imputer.py b/dataprep/clean/components/cat_imputation/constant_imputer.py index c83433f33..c123767a7 100644 --- a/dataprep/clean/components/cat_imputation/constant_imputer.py +++ b/dataprep/clean/components/cat_imputation/constant_imputer.py @@ -72,7 +72,6 @@ def fit_transform(self, col_df: dd.Series) -> dd.Series: return self.fit(col_df).transform(col_df) def fillna(self, val: str) -> str: - """ Check if the value is in the list of null value. If yes, impute the data column with constant value. diff --git a/dataprep/clean/components/cat_imputer.py b/dataprep/clean/components/cat_imputer.py index ca07d0fe7..1197ef17c 100644 --- a/dataprep/clean/components/cat_imputer.py +++ b/dataprep/clean/components/cat_imputer.py @@ -9,7 +9,6 @@ class CatImputer: - """Categorical imputer for imputing missing values in categorical columns Attributes: impute_type diff --git a/dataprep/clean/components/num_imputer.py b/dataprep/clean/components/num_imputer.py index ee534cce7..feb5718a3 100644 --- a/dataprep/clean/components/num_imputer.py +++ b/dataprep/clean/components/num_imputer.py @@ -20,7 +20,6 @@ class NumImputer: """ def __init__(self, num_pipe_info: Dict[str, Any]) -> None: - """ This function initiate numerical imputer. diff --git a/dataprep/clean/gui/clean_gui.py b/dataprep/clean/gui/clean_gui.py index c11904611..3076d614d 100644 --- a/dataprep/clean/gui/clean_gui.py +++ b/dataprep/clean/gui/clean_gui.py @@ -1,6 +1,7 @@ """ Flask backend of Dataprep.Clean GUI. """ + # pylint: disable=R0912, R0915 from typing import Any diff --git a/dataprep/clean/pipeline.py b/dataprep/clean/pipeline.py index e88d49bf1..da6e14d27 100644 --- a/dataprep/clean/pipeline.py +++ b/dataprep/clean/pipeline.py @@ -11,7 +11,6 @@ class Pipeline: - """Pipeline for managing categorical column and numerical column. Attributes: cat_pipeline diff --git a/dataprep/clean/utils.py b/dataprep/clean/utils.py index 2e19613b9..3360e9675 100644 --- a/dataprep/clean/utils.py +++ b/dataprep/clean/utils.py @@ -1,4 +1,5 @@ """Common functions""" + import http.client import json from math import ceil diff --git a/dataprep/connector/config_manager.py b/dataprep/connector/config_manager.py index 4dacaba1e..b0434e1cf 100644 --- a/dataprep/connector/config_manager.py +++ b/dataprep/connector/config_manager.py @@ -1,6 +1,7 @@ """ Functions for config downloading and maintaining """ + import json from json import dump as jdump from pathlib import Path diff --git a/dataprep/connector/connector.py b/dataprep/connector/connector.py index f0e531350..30ddd453d 100644 --- a/dataprep/connector/connector.py +++ b/dataprep/connector/connector.py @@ -2,6 +2,7 @@ This module contains the Connector class. Every data fetching action should begin with instantiating this Connector class. """ + import math import sys from asyncio import as_completed diff --git a/dataprep/connector/errors.py b/dataprep/connector/errors.py index 7890c447a..5bc7a3c2d 100644 --- a/dataprep/connector/errors.py +++ b/dataprep/connector/errors.py @@ -1,6 +1,7 @@ """ Module defines errors used in this library. """ + from typing import Set from ..errors import DataprepError diff --git a/dataprep/connector/generator/__init__.py b/dataprep/connector/generator/__init__.py index a1a7e08b3..724cae78c 100644 --- a/dataprep/connector/generator/__init__.py +++ b/dataprep/connector/generator/__init__.py @@ -1,4 +1,5 @@ """ConfigGenerator""" + from .generator import ConfigGenerator from .ui import ConfigGeneratorUI diff --git a/dataprep/connector/generator/ui.py b/dataprep/connector/generator/ui.py index b356a9f9b..922f7bcb0 100644 --- a/dataprep/connector/generator/ui.py +++ b/dataprep/connector/generator/ui.py @@ -1,6 +1,5 @@ """This module implements the generation of connector config generation UI.""" - from base64 import b64encode from typing import Any, Dict, Generator, Optional, Tuple from zipfile import ZipFile @@ -206,9 +205,11 @@ def _on_send_request(self, _: Any) -> None: "method": self.request_type.value, "params": params_value, "pagination": pagparams, - "authorization": (authparams, authparams_user) - if self.authtype_box.value != "No Authorization" - else None, + "authorization": ( + (authparams, authparams_user) + if self.authtype_box.value != "No Authorization" + else None + ), } backend = ConfigGenerator(self.existing) diff --git a/dataprep/connector/implicit_database.py b/dataprep/connector/implicit_database.py index aca901e85..651bc6877 100644 --- a/dataprep/connector/implicit_database.py +++ b/dataprep/connector/implicit_database.py @@ -3,6 +3,7 @@ where ImplicitDatabase is a conceptual model describes a website and ImplicitTable describes an API endpoint. """ + from json import load as jload from json import loads as jloads from pathlib import Path diff --git a/dataprep/connector/info.py b/dataprep/connector/info.py index ed62edf39..c8d07d255 100644 --- a/dataprep/connector/info.py +++ b/dataprep/connector/info.py @@ -1,4 +1,5 @@ """This module contains back end functions helping developers use data connector.""" + from typing import Any, Dict, List import pandas as pd diff --git a/dataprep/connector/info_ui.py b/dataprep/connector/info_ui.py index b88ca9431..6dbe98d5a 100644 --- a/dataprep/connector/info_ui.py +++ b/dataprep/connector/info_ui.py @@ -1,4 +1,5 @@ """This module handles displaying information on how to connect and query.""" + from typing import Any, Dict from jinja2 import Environment, PackageLoader, select_autoescape from ..utils import display_html diff --git a/dataprep/connector/schema/defs.py b/dataprep/connector/schema/defs.py index 662b1cbd0..d564f5378 100644 --- a/dataprep/connector/schema/defs.py +++ b/dataprep/connector/schema/defs.py @@ -1,4 +1,5 @@ """Strong typed schema definition.""" + import http.server import json import random diff --git a/dataprep/connector/sql.py b/dataprep/connector/sql.py index 00b88262c..094dde57b 100644 --- a/dataprep/connector/sql.py +++ b/dataprep/connector/sql.py @@ -2,6 +2,7 @@ This module contains the method of read_sql. It is a wrapper on connectorx.read_sql function. """ + from typing import Optional, Tuple, Union, List, Any try: diff --git a/dataprep/connector/throttler.py b/dataprep/connector/throttler.py index 059be1d93..c4fb2e714 100644 --- a/dataprep/connector/throttler.py +++ b/dataprep/connector/throttler.py @@ -2,6 +2,7 @@ Throttler limits how many requests can issue given a specific time window Copied from https://github.com/hallazzang/asyncio-throttle """ + import time import asyncio from collections import deque diff --git a/dataprep/connector/utils.py b/dataprep/connector/utils.py index 7b4381452..7bcadee20 100644 --- a/dataprep/connector/utils.py +++ b/dataprep/connector/utils.py @@ -1,6 +1,7 @@ """ This module contains common utilities used by the connector """ + from typing import Any, Dict, Optional import http.client import urllib.parse diff --git a/dataprep/eda/__init__.py b/dataprep/eda/__init__.py index 593f8a67b..3b6765940 100644 --- a/dataprep/eda/__init__.py +++ b/dataprep/eda/__init__.py @@ -2,6 +2,7 @@ dataprep.eda ============ """ + from bokeh.io import output_notebook from ..utils import is_notebook diff --git a/dataprep/eda/correlation/render.py b/dataprep/eda/correlation/render.py index 2f185ca2a..e3b541517 100644 --- a/dataprep/eda/correlation/render.py +++ b/dataprep/eda/correlation/render.py @@ -2,6 +2,7 @@ This module implements the visualization for plot_correlation(df) function """ + from typing import Any, Dict, List, Sequence, Tuple import numpy as np @@ -93,6 +94,7 @@ def render_correlation(itmdt: Intermediate, cfg: Config) -> Any: # _discard_unused_visual_elems(fig) # return fig + ########## HeatMaps ########## def tweak_figure(fig: Figure) -> None: """ diff --git a/dataprep/eda/create_db_report/__init__.py b/dataprep/eda/create_db_report/__init__.py index 5b7943c3f..1cf5b7102 100644 --- a/dataprep/eda/create_db_report/__init__.py +++ b/dataprep/eda/create_db_report/__init__.py @@ -1,6 +1,7 @@ """ This module implements the create_db_report(sql_engine) function. """ + import warnings from sqlalchemy.engine.base import Engine from .run_function import generate_db_report diff --git a/dataprep/eda/create_db_report/report.py b/dataprep/eda/create_db_report/report.py index 1f35838ca..fe9948f55 100644 --- a/dataprep/eda/create_db_report/report.py +++ b/dataprep/eda/create_db_report/report.py @@ -1,6 +1,7 @@ """ This module implements the Report class for create_db_report. """ + import os import shutil import warnings diff --git a/dataprep/eda/create_db_report/run_function.py b/dataprep/eda/create_db_report/run_function.py index 316a994cd..f377c3db6 100644 --- a/dataprep/eda/create_db_report/run_function.py +++ b/dataprep/eda/create_db_report/run_function.py @@ -121,9 +121,11 @@ def parse_tables( column["type"], str(column["attnotnull"]).upper() == "TRUE", column["default"], - str(column["auto_increment"]).upper() == "TRUE" - if "auto_increment" in column - else False, + ( + str(column["auto_increment"]).upper() == "TRUE" + if "auto_increment" in column + else False + ), column["description"] if "description" in column else "", ) current_table.add_column(c.upper(), create_table_column) @@ -144,9 +146,11 @@ def parse_tables( column["type"], column["attnotnull"] == "True", column["default"], - str(column["auto_increment"]).upper() == "TRUE" - if "auto_increment" in column - else False, + ( + str(column["auto_increment"]).upper() == "TRUE" + if "auto_increment" in column + else False + ), column["description"] if "description" in column else "", ) collect_columns[c.upper()] = create_view_column diff --git a/dataprep/eda/create_diff_report/__init__.py b/dataprep/eda/create_diff_report/__init__.py index e48d6f8a5..27199be11 100644 --- a/dataprep/eda/create_diff_report/__init__.py +++ b/dataprep/eda/create_diff_report/__init__.py @@ -1,6 +1,7 @@ """ This module implements the create_diff_report([df1, df2]) function. """ + import warnings from typing import Any, Dict, List, Optional, Union diff --git a/dataprep/eda/create_report/__init__.py b/dataprep/eda/create_report/__init__.py index 593608691..61b72a0b3 100644 --- a/dataprep/eda/create_report/__init__.py +++ b/dataprep/eda/create_report/__init__.py @@ -1,6 +1,7 @@ """ This module implements the create_report(df) function. """ + import warnings from typing import Any, Dict, List, Optional diff --git a/dataprep/eda/create_report/report.py b/dataprep/eda/create_report/report.py index 457c44993..387480572 100644 --- a/dataprep/eda/create_report/report.py +++ b/dataprep/eda/create_report/report.py @@ -1,6 +1,7 @@ """ This module implements the Report class. """ + import sys import webbrowser from pathlib import Path diff --git a/dataprep/eda/diff/render.py b/dataprep/eda/diff/render.py index 7a49bd679..201565499 100644 --- a/dataprep/eda/diff/render.py +++ b/dataprep/eda/diff/render.py @@ -1,6 +1,7 @@ """ This module implements the visualization for the plot_diff function. """ # pylint: disable=too-many-lines + from typing import Any, Dict, List, Tuple, Optional import math diff --git a/dataprep/eda/distribution/compute/__init__.py b/dataprep/eda/distribution/compute/__init__.py index e7fc309b8..fd867b50f 100644 --- a/dataprep/eda/distribution/compute/__init__.py +++ b/dataprep/eda/distribution/compute/__init__.py @@ -2,7 +2,6 @@ Computations for plot(df, ...) """ - import warnings from typing import Optional, Union, List, Dict, Any, Tuple import dask.dataframe as dd diff --git a/dataprep/eda/distribution/compute/bivariate.py b/dataprep/eda/distribution/compute/bivariate.py index 7716e3df6..97dd37d39 100644 --- a/dataprep/eda/distribution/compute/bivariate.py +++ b/dataprep/eda/distribution/compute/bivariate.py @@ -1,4 +1,5 @@ """Computations for plot(df, x, y).""" + from typing import Any, Dict, List, Optional, Tuple, Union import dask diff --git a/dataprep/eda/distribution/compute/overview.py b/dataprep/eda/distribution/compute/overview.py index cd41080e8..47dfc0144 100644 --- a/dataprep/eda/distribution/compute/overview.py +++ b/dataprep/eda/distribution/compute/overview.py @@ -229,7 +229,7 @@ def _format_ov_ins(data: Dict[str, Any], cfg: Config) -> List[Dict[str, str]]: ndup = data["nrows"] - data["nrows_wo_dups"] ins.append({"Duplicates": f"Dataset has {ndup} ({pdup}%) duplicate rows"}) - for (*cols, test_result) in data.get("ks_tests", []): + for *cols, test_result in data.get("ks_tests", []): if test_result > cfg.insight.similar_distribution__threshold: msg = f"/*start*/{cols[0]}/*end*/ and /*start*/{cols[1]}/*end*/ have similar distributions" ins.append({"Similar Distribution": msg}) diff --git a/dataprep/eda/distribution/render.py b/dataprep/eda/distribution/render.py index 478b75292..2048aa1ba 100644 --- a/dataprep/eda/distribution/render.py +++ b/dataprep/eda/distribution/render.py @@ -1,6 +1,7 @@ """ This module implements the visualization for the plot(df) function. """ # pylint: disable=too-many-lines + from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union import json diff --git a/dataprep/eda/dtypes.py b/dataprep/eda/dtypes.py index 820ddbd05..614c5dc82 100644 --- a/dataprep/eda/dtypes.py +++ b/dataprep/eda/dtypes.py @@ -1,6 +1,7 @@ """ In this module lives the type tree. """ + from collections import defaultdict from typing import Any, DefaultDict, Dict, List, Optional, Tuple, Type, Union diff --git a/dataprep/eda/dtypes_v2.py b/dataprep/eda/dtypes_v2.py index 376a69dde..022b461c1 100644 --- a/dataprep/eda/dtypes_v2.py +++ b/dataprep/eda/dtypes_v2.py @@ -1,6 +1,7 @@ """ In this module lives the type tree. """ + from typing import Any, Dict, Optional, Type, Union import dask.dataframe as dd diff --git a/dataprep/eda/intermediate.py b/dataprep/eda/intermediate.py index 331e4fb0b..64bd4dc0a 100644 --- a/dataprep/eda/intermediate.py +++ b/dataprep/eda/intermediate.py @@ -1,6 +1,7 @@ """ Intermediate class """ + from typing import Any, Dict, Tuple, Union, Optional from pathlib import Path diff --git a/dataprep/eda/missing/compute/__init__.py b/dataprep/eda/missing/compute/__init__.py index 887303ab6..3588046fb 100644 --- a/dataprep/eda/missing/compute/__init__.py +++ b/dataprep/eda/missing/compute/__init__.py @@ -1,6 +1,7 @@ """This module implements the plot_missing(df) function's calculating intermediate part """ + from typing import Optional, cast, List, Any, Dict, Union import warnings from scipy.cluster.hierarchy import ClusterWarning diff --git a/dataprep/eda/missing/compute/common.py b/dataprep/eda/missing/compute/common.py index fb17c96a6..ddbb567f3 100644 --- a/dataprep/eda/missing/compute/common.py +++ b/dataprep/eda/missing/compute/common.py @@ -1,4 +1,5 @@ """Common parts for compute missing.""" + from typing import Optional, Tuple import dask.array as da diff --git a/dataprep/eda/missing/compute/nullivariate.py b/dataprep/eda/missing/compute/nullivariate.py index 316d5ad88..1597a98c8 100644 --- a/dataprep/eda/missing/compute/nullivariate.py +++ b/dataprep/eda/missing/compute/nullivariate.py @@ -1,6 +1,7 @@ """This module implements the plot_missing(df) function's calculating intermediate part """ + from typing import Any, Callable, Dict, Generator, List, Optional, Tuple import dask.array as da diff --git a/dataprep/eda/missing/compute/univariate.py b/dataprep/eda/missing/compute/univariate.py index 9dc7dd527..6d461bb18 100644 --- a/dataprep/eda/missing/compute/univariate.py +++ b/dataprep/eda/missing/compute/univariate.py @@ -1,6 +1,7 @@ """This module implements the plot_missing(df, x) function's calculating intermediate part """ + from typing import Any, Generator, List import numpy as np diff --git a/dataprep/eda/missing/render.py b/dataprep/eda/missing/render.py index 4e6014f8f..2c9bb3093 100644 --- a/dataprep/eda/missing/render.py +++ b/dataprep/eda/missing/render.py @@ -2,6 +2,7 @@ This module implements the plot_missing(df, x, y) function's visualization part. """ + from typing import Any, Dict, List, Optional, Sequence, Tuple import numpy as np diff --git a/dataprep/eda/outlier/computation.py b/dataprep/eda/outlier/computation.py index ebf4ec08b..6960bf814 100644 --- a/dataprep/eda/outlier/computation.py +++ b/dataprep/eda/outlier/computation.py @@ -2,7 +2,6 @@ Module containing plot_outlier function. """ - import dask.dataframe as dd from ..intermediate import Intermediate diff --git a/dataprep/eda/palette.py b/dataprep/eda/palette.py index 96b70108e..ad7ffa59b 100644 --- a/dataprep/eda/palette.py +++ b/dataprep/eda/palette.py @@ -1,6 +1,7 @@ """ This file defines palettes used for EDA. """ + from bokeh.palettes import Category10, Category20, Greys256, Pastel1, viridis BRG = ["#1f78b4", "#d62728", "#2ca02c"] diff --git a/dataprep/eda/utils.py b/dataprep/eda/utils.py index 75dd0364f..f17fe0e03 100644 --- a/dataprep/eda/utils.py +++ b/dataprep/eda/utils.py @@ -1,5 +1,6 @@ """Miscellaneous functions """ + import logging from math import ceil from typing import Any, Dict, List, Optional, Tuple, Union, cast diff --git a/dataprep/lineage/lx.py b/dataprep/lineage/lx.py index 42ab0b3f9..ce315c96a 100644 --- a/dataprep/lineage/lx.py +++ b/dataprep/lineage/lx.py @@ -2,6 +2,7 @@ This module contains the method of lineagex. It is a wrapper on lineagex.lineagex function. """ + from typing import Optional, Union, List try: diff --git a/dataprep/progress_bar.py b/dataprep/progress_bar.py index b6b92ea9f..f98f4e4ca 100644 --- a/dataprep/progress_bar.py +++ b/dataprep/progress_bar.py @@ -1,4 +1,5 @@ """ProgressBar shows the how many dask tasks finished/remains using tqdm.""" + import warnings from time import time from typing import Any, Dict, Optional, Tuple, Union @@ -12,6 +13,7 @@ else: from tqdm import tqdm + # pylint: disable=method-hidden,too-many-instance-attributes class ProgressBar(Callback): # type: ignore """A progress bar for DataPrep.EDA. diff --git a/dataprep/tests/benchmarks/eda.py b/dataprep/tests/benchmarks/eda.py index 791c0d8d3..7e516c93b 100644 --- a/dataprep/tests/benchmarks/eda.py +++ b/dataprep/tests/benchmarks/eda.py @@ -1,6 +1,7 @@ """ This module is for performance testing of EDA module in github action. """ + from functools import partial import pandas as pd from typing import Any diff --git a/dataprep/tests/clean/test_clean_address.py b/dataprep/tests/clean/test_clean_address.py index c1e15f64f..a10c7418b 100644 --- a/dataprep/tests/clean/test_clean_address.py +++ b/dataprep/tests/clean/test_clean_address.py @@ -1,6 +1,7 @@ """ module for testing the functions clean_address() and validate_address() """ + import logging import numpy as np diff --git a/dataprep/tests/clean/test_clean_country.py b/dataprep/tests/clean/test_clean_country.py index c2d7bc59b..49f30bb06 100644 --- a/dataprep/tests/clean/test_clean_country.py +++ b/dataprep/tests/clean/test_clean_country.py @@ -1,6 +1,7 @@ """ module for testing the functions clean_country() and validate_country() """ + import logging import numpy as np diff --git a/dataprep/tests/clean/test_clean_date.py b/dataprep/tests/clean/test_clean_date.py index 5f9da8c06..5b0144b25 100644 --- a/dataprep/tests/clean/test_clean_date.py +++ b/dataprep/tests/clean/test_clean_date.py @@ -1,6 +1,7 @@ """ module for testing the functions clean_country() and validate_country() """ + import logging import numpy as np diff --git a/dataprep/tests/clean/test_clean_email.py b/dataprep/tests/clean/test_clean_email.py index 57299ffe6..a3e48984b 100755 --- a/dataprep/tests/clean/test_clean_email.py +++ b/dataprep/tests/clean/test_clean_email.py @@ -1,6 +1,7 @@ """ module for testing the functions clean_email() and validate_email() """ + import logging import numpy as np diff --git a/dataprep/tests/clean/test_clean_headers.py b/dataprep/tests/clean/test_clean_headers.py index 2b13ea4b8..995cf496e 100644 --- a/dataprep/tests/clean/test_clean_headers.py +++ b/dataprep/tests/clean/test_clean_headers.py @@ -1,6 +1,7 @@ """ module for testing the function clean_headers() """ + import logging import numpy as np diff --git a/dataprep/tests/clean/test_clean_lat_long.py b/dataprep/tests/clean/test_clean_lat_long.py index ddcb27930..1b9bc97ba 100644 --- a/dataprep/tests/clean/test_clean_lat_long.py +++ b/dataprep/tests/clean/test_clean_lat_long.py @@ -1,6 +1,7 @@ """ module for testing the functions clean_lat_long() and validate_lat_long() """ + import logging import numpy as np diff --git a/dataprep/tests/clean/test_clean_phone.py b/dataprep/tests/clean/test_clean_phone.py index d96081b24..578799119 100644 --- a/dataprep/tests/clean/test_clean_phone.py +++ b/dataprep/tests/clean/test_clean_phone.py @@ -1,6 +1,7 @@ """ module for testing the functions clean_phone() and validate_phone() """ + import logging import numpy as np diff --git a/dataprep/tests/clean/test_clean_text.py b/dataprep/tests/clean/test_clean_text.py index fcb791b6d..76e8b5cc3 100644 --- a/dataprep/tests/clean/test_clean_text.py +++ b/dataprep/tests/clean/test_clean_text.py @@ -1,6 +1,7 @@ """ module for testing the functions clean_text() and default_text_pipeline() """ + import re import logging from typing import Any, Dict, List diff --git a/dataprep/tests/datasets/test_datasets.py b/dataprep/tests/datasets/test_datasets.py index ca066d6f4..3a0128b8a 100644 --- a/dataprep/tests/datasets/test_datasets.py +++ b/dataprep/tests/datasets/test_datasets.py @@ -1,6 +1,7 @@ """ module for testing the functions inside datasets """ + from ...datasets import get_dataset_names, get_db_names, load_dataset, load_db diff --git a/dataprep/tests/eda/test_config.py b/dataprep/tests/eda/test_config.py index 306ed21b2..3d3c8faf3 100644 --- a/dataprep/tests/eda/test_config.py +++ b/dataprep/tests/eda/test_config.py @@ -1,6 +1,7 @@ """ This module for testing config parameter """ + import dask.dataframe as dd import pandas as pd import numpy as np diff --git a/dataprep/tests/eda/test_create_diff_report.py b/dataprep/tests/eda/test_create_diff_report.py index 64f1b4092..828681f4a 100644 --- a/dataprep/tests/eda/test_create_diff_report.py +++ b/dataprep/tests/eda/test_create_diff_report.py @@ -1,6 +1,7 @@ """ module for testing create_diff_report(df) function. """ + import logging import numpy as np import pandas as pd diff --git a/dataprep/tests/eda/test_create_report.py b/dataprep/tests/eda/test_create_report.py index 97a536906..28260c4ac 100644 --- a/dataprep/tests/eda/test_create_report.py +++ b/dataprep/tests/eda/test_create_report.py @@ -1,6 +1,7 @@ """ module for testing create_report(df) function. """ + import logging import numpy as np import pandas as pd diff --git a/dataprep/tests/eda/test_plot.py b/dataprep/tests/eda/test_plot.py index cad65e10e..37a2e7d11 100644 --- a/dataprep/tests/eda/test_plot.py +++ b/dataprep/tests/eda/test_plot.py @@ -1,6 +1,7 @@ """ module for testing plot(df, x, y) function. """ + import logging import dask.dataframe as dd diff --git a/dataprep/tests/eda/test_plot_correlation.py b/dataprep/tests/eda/test_plot_correlation.py index 4f483bc06..7a4af926c 100644 --- a/dataprep/tests/eda/test_plot_correlation.py +++ b/dataprep/tests/eda/test_plot_correlation.py @@ -1,6 +1,7 @@ """ module for testing plot_corr(df, x, y) function. """ + import random from time import time diff --git a/dataprep/tests/eda/test_plot_diff.py b/dataprep/tests/eda/test_plot_diff.py index 3ad059faa..e7703168f 100644 --- a/dataprep/tests/eda/test_plot_diff.py +++ b/dataprep/tests/eda/test_plot_diff.py @@ -1,6 +1,7 @@ """ module for testing plot_diff([df1, df2, ..., dfn]) function. """ + import logging import dask.dataframe as dd diff --git a/dataprep/tests/eda/test_plot_missing.py b/dataprep/tests/eda/test_plot_missing.py index 532d0e8d5..b7a215ac2 100644 --- a/dataprep/tests/eda/test_plot_missing.py +++ b/dataprep/tests/eda/test_plot_missing.py @@ -1,6 +1,7 @@ """ This module for testing plot_missing(df, x, y) function. """ + import dask.dataframe as dd import numpy as np import pandas as pd diff --git a/dataprep/tests/lineage/test_lineagex.py b/dataprep/tests/lineage/test_lineagex.py index 31f483e36..6c24419cc 100644 --- a/dataprep/tests/lineage/test_lineagex.py +++ b/dataprep/tests/lineage/test_lineagex.py @@ -12,18 +12,11 @@ ) def test_read_sql() -> None: db_url = environ["DB_URL"] - sql = os.path.join(os.getcwd(), 'dependency_example') - lx = lineagex( - sql, - "mimiciii_derived", - db_url, - "mimiciii_clinical, public" - ) + sql = os.path.join(os.getcwd(), "dependency_example") + lx = lineagex(sql, "mimiciii_derived", db_url, "mimiciii_clinical, public") print("dependency test with database connection", lx) lx = lineagex( - sql=sql, - target_schema="mimiciii_derived", - search_path_schema="mimiciii_clinical, public" + sql=sql, target_schema="mimiciii_derived", search_path_schema="mimiciii_clinical, public" ) print("dependency test without database connection", lx) diff --git a/dataprep/utils.py b/dataprep/utils.py index e3f89cdef..56ed9fe98 100644 --- a/dataprep/utils.py +++ b/dataprep/utils.py @@ -1,4 +1,5 @@ """Utility functions used by the whole library.""" + from typing import Any import webbrowser from tempfile import NamedTemporaryFile diff --git a/docs/source/user_guide/eda/house_price.ipynb b/docs/source/user_guide/eda/house_price.ipynb index 5e8af7391..23616fbea 100644 --- a/docs/source/user_guide/eda/house_price.ipynb +++ b/docs/source/user_guide/eda/house_price.ipynb @@ -2,13 +2,14 @@ "cells": [ { "cell_type": "markdown", + "metadata": {}, "source": [ "# EDA Case Study: House Price" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Task Description\n", "House Prices is a classical Kaggle competition. The task is to predicts final price of each house. For more detail, refer to https://www.kaggle.com/c/house-prices-advanced-regression-techniques/.\n", @@ -21,19 +22,20 @@ "* **Univariable study**. We'll just focus on the dependent variable ('SalePrice') and try to know a little bit more about it.\n", "* **Multivariate study**. We'll try to understand how the dependent variable and independent variables relate.\n", "* **Basic cleaning**. We'll clean the dataset and handle the missing data, outliers and categorical variables." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Import libraries" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from dataprep.eda import plot\n", "from dataprep.eda import plot_correlation\n", @@ -47,80 +49,79 @@ "import seaborn as sns\n", "sns.set(style=\"whitegrid\", color_codes=True)\n", "sns.set(font_scale=1)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Load data" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "houses = load_dataset(\"house_prices_train\")\n", "houses.head()" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "houses_test = load_dataset(\"house_prices_test\")\n", "houses_test.head()" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "houses.shape" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "There are total 1460 tuples, each tuple contains 80 features and 1 target value." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "houses_test.shape" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Variable identification" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot(houses)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Overview of the data\n", "We could get the following information:\n", @@ -129,36 +130,36 @@ "* **Missing value**-How many missing values each column contains. For instance, Fence contains 80.8% * 1460 = 1180 missing tuples. Usually, some model does not allow the input data contains missing value such as SVM, we have to clean the data before we utilize it.\n", "* **Target Value**-The distribution of target value (SalePrice). According to the distribution of the target value, we could get the information that the target value is numerical and the distribution of the target value conforms to the norm distribution. Thus, we are not confronted with imbalanced classes problem. It is really great.\n", "* **Guess**-According to the columns' name, we reckon GrLivArea, YearBuilt and OverallQual are likely to be correlated to the target value (SalePrice)." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Correlation in data" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot_correlation(houses, \"SalePrice\")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot_correlation(houses, \"SalePrice\", value_range=[0.5, 1])" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "OverallQual, GrLivArea, GarageCars, GarageArea, TotalBsmtSF, 1stFlrSF, FullBath, TotRmsAbvGrd, YearBuilt, YearRemodAdd have more than 0.5 Pearson correlation with SalePrice.\n", "\n", @@ -169,27 +170,27 @@ "EnclosedPorch and KitchenAbvGr have little negative correlation with target variable.\n", "\n", "These can prove to be important features to predict SalePrice." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Heatmap" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot_correlation(houses)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### In summary\n", "In my opinion, this heatmap is the best way to get a quick overview of features' relationships.\n", @@ -197,22 +198,22 @@ "At first sight, there are two red colored squares that get my attention. The first one refers to the 'TotalBsmtSF' and '1stFlrSF' variables, and the second one refers to the 'GarageX' variables. Both cases show how significant the correlation is between these variables. Actually, this correlation is so strong that it can indicate a situation of multicollinearity. If we think about these variables, we can conclude that they give almost the same information so multicollinearity really occurs. Heatmaps are great to detect this kind of situations and in problems dominated by feature selection, like ours, they are an essential tool.\n", "\n", "Another thing that got my attention was the 'SalePrice' correlations. We can see our well-known 'GrLivArea', 'TotalBsmtSF', and 'OverallQual', but we can also see many other variables that should be taken into account. That's what we will do next." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot_correlation(houses[[\"SalePrice\",\"OverallQual\",\"GrLivArea\",\"GarageCars\",\n", " \"GarageArea\",\"GarageYrBlt\",\"TotalBsmtSF\",\"1stFlrSF\",\"FullBath\",\n", " \"TotRmsAbvGrd\",\"YearBuilt\",\"YearRemodAdd\"]])" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "As we saw above there are few feature which shows high multicollinearity from heatmap. Lets focus on red squares on diagonal line and few on the sides.\n", "\n", @@ -227,29 +228,29 @@ "YearBulit and GarageYrBlt\n", "\n", "We have to create a single feature from them before we use them as predictors." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot_correlation(houses, value_range=[0.5, 1])" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot_correlation(houses, k=30)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "**Attribute Pair\tCorrelation**\n", "\n", @@ -321,98 +322,98 @@ "Make new feature by adding them or by some other operation.\n", "Use PCA, which will reduce feature set to small number of non-collinear features.\n", "Reference:http://blog.minitab.com/blog/understanding-statistics/handling-multicollinearity-in-regression-analysis" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Univariate Analysis\n", "How 1 single variable is distributed in numeric range. What is statistical summary of it. Is it positively skewed or negatively." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot(houses, \"SalePrice\")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Pivotal Features" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot_correlation(houses, \"OverallQual\", \"SalePrice\")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot(houses, \"OverallQual\", \"SalePrice\")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot(houses, \"GarageCars\", \"SalePrice\")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot(houses, \"Fireplaces\", \"SalePrice\")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot(houses, \"GrLivArea\", \"SalePrice\")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot(houses, \"TotalBsmtSF\", \"SalePrice\")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot(houses, \"YearBuilt\", \"SalePrice\")" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### In summary\n", "Based on the above analysis, we can conclude that:\n", @@ -422,11 +423,11 @@ "We just analysed four variables, but there are many other that we should analyse. The trick here seems to be the choice of the right features (feature selection) and not the definition of complex relationships between them (feature engineering).\n", "\n", "That said, let's separate the wheat from the chaff." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Missing Value Imputation\n", "Missing values in the training data set can affect prediction or classification of a model negatively.\n", @@ -436,87 +437,86 @@ "But filling missing values with mean/median/mode or using another predictive model to predict missing values is also a prediction which may not be 100% accurate, instead you can use models like Decision Trees and Random Forest which handle missing values very well.\n", "\n", "Some of this part is based on this kernel: https://www.kaggle.com/bisaria/house-prices-advanced-regression-techniques/handling-missing-data" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "plot_missing(houses)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# plot_missing(houses, \"BsmtQual\")\n", "basement_cols=['BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinType2','BsmtFinSF1','BsmtFinSF2']\n", "houses[basement_cols][houses['BsmtQual'].isnull()==True]" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "All categorical variables contains NAN whereas continuous ones have 0. So that means there is no basement for those houses. we can replace it with 'None'." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "for col in basement_cols:\n", " if 'FinSF'not in col:\n", " houses[col] = houses[col].fillna('None')" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# plot_missing(houses, \"FireplaceQu\")\n", "houses[\"FireplaceQu\"] = houses[\"FireplaceQu\"].fillna('None')\n", "pd.crosstab(houses.Fireplaces, houses.FireplaceQu)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "garage_cols=['GarageType','GarageQual','GarageCond','GarageYrBlt','GarageFinish','GarageCars','GarageArea']\n", "houses[garage_cols][houses['GarageType'].isnull()==True]" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "All garage related features are missing values in same rows. that means we can replace categorical variables with None and continuous ones with 0." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "for col in garage_cols:\n", - " if houses[col].dtype==np.object:\n", + " if houses[col].dtype== object:\n", " houses[col] = houses[col].fillna('None')\n", " else:\n", " houses[col] = houses[col].fillna(0)" - ], - "outputs": [], - "metadata": {} + ] } ], "metadata": { @@ -583,4 +583,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +}